LCOV - code coverage report
Current view: top level - gcc - tree-vect-stmts.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 80.0 % 7273 5822
Test Date: 2026-06-20 15:32:29 Functions: 88.8 % 107 95
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Statement Analysis and Transformation for Vectorization
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              :    and Ira Rosen <irar@il.ibm.com>
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "target.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "gimple.h"
      30              : #include "ssa.h"
      31              : #include "optabs-tree.h"
      32              : #include "insn-config.h"
      33              : #include "recog.h"            /* FIXME: for insn_data */
      34              : #include "cgraph.h"
      35              : #include "dumpfile.h"
      36              : #include "alias.h"
      37              : #include "fold-const.h"
      38              : #include "stor-layout.h"
      39              : #include "tree-eh.h"
      40              : #include "gimplify.h"
      41              : #include "gimple-iterator.h"
      42              : #include "gimplify-me.h"
      43              : #include "tree-cfg.h"
      44              : #include "tree-ssa-loop-manip.h"
      45              : #include "cfgloop.h"
      46              : #include "explow.h"
      47              : #include "tree-ssa-loop.h"
      48              : #include "tree-scalar-evolution.h"
      49              : #include "tree-vectorizer.h"
      50              : #include "builtins.h"
      51              : #include "internal-fn.h"
      52              : #include "tree-vector-builder.h"
      53              : #include "vec-perm-indices.h"
      54              : #include "gimple-range.h"
      55              : #include "tree-ssa-loop-niter.h"
      56              : #include "gimple-fold.h"
      57              : #include "regs.h"
      58              : #include "attribs.h"
      59              : #include "optabs-libfuncs.h"
      60              : #include "tree-dfa.h"
      61              : 
      62              : /* For lang_hooks.types.type_for_mode.  */
      63              : #include "langhooks.h"
      64              : 
      65              : static tree vector_vector_composition_type (tree, poly_uint64, tree *,
      66              :                                             bool = false);
      67              : 
      68              : /* Return TRUE iff the given statement is in an inner loop relative to
      69              :    the loop being vectorized.  */
      70              : bool
      71      5813199 : stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
      72              : {
      73      5813199 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
      74      5813199 :   basic_block bb = gimple_bb (stmt);
      75      5813199 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
      76      2761581 :   class loop* loop;
      77              : 
      78      2761581 :   if (!loop_vinfo)
      79              :     return false;
      80              : 
      81      2761581 :   loop = LOOP_VINFO_LOOP (loop_vinfo);
      82              : 
      83      2761581 :   return (bb->loop_father == loop->inner);
      84              : }
      85              : 
      86              : /* Record the cost of a statement, either by directly informing the
      87              :    target model or by saving it in a vector for later processing.
      88              :    Return a preliminary estimate of the statement's cost.  */
      89              : 
      90              : unsigned
      91      8828558 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
      92              :                   enum vect_cost_for_stmt kind,
      93              :                   stmt_vec_info stmt_info, slp_tree node,
      94              :                   tree vectype, int misalign,
      95              :                   enum vect_cost_model_location where)
      96              : {
      97      8828558 :   if ((kind == vector_load || kind == unaligned_load)
      98      1582005 :       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
      99              :     kind = vector_gather_load;
     100      8828558 :   if ((kind == vector_store || kind == unaligned_store)
     101      1020042 :       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
     102      8828558 :     kind = vector_scatter_store;
     103              : 
     104      8828558 :   stmt_info_for_cost si
     105      8828558 :     = { count, kind, where, stmt_info, node, vectype, misalign };
     106      8828558 :   body_cost_vec->safe_push (si);
     107              : 
     108      8828558 :   return (unsigned)
     109      8828558 :       (builtin_vectorization_cost (kind, vectype, misalign) * count);
     110              : }
     111              : 
     112              : unsigned
     113      3977342 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
     114              :                   enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
     115              :                   tree vectype, int misalign,
     116              :                   enum vect_cost_model_location where)
     117              : {
     118      3977342 :   return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
     119      3977342 :                            vectype, misalign, where);
     120              : }
     121              : 
     122              : unsigned
     123      1787066 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
     124              :                   enum vect_cost_for_stmt kind, slp_tree node,
     125              :                   tree vectype, int misalign,
     126              :                   enum vect_cost_model_location where)
     127              : {
     128      1787066 :   return record_stmt_cost (body_cost_vec, count, kind,
     129              :                            SLP_TREE_REPRESENTATIVE (node), node,
     130      1787066 :                            vectype, misalign, where);
     131              : }
     132              : 
     133              : unsigned
     134            0 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
     135              :                   enum vect_cost_for_stmt kind,
     136              :                   enum vect_cost_model_location where)
     137              : {
     138            0 :   gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
     139              :               || kind == scalar_stmt);
     140            0 :   return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
     141            0 :                            NULL_TREE, 0, where);
     142              : }
     143              : 
     144              : /* Return a variable of type ELEM_TYPE[NELEMS].  */
     145              : 
     146              : static tree
     147            0 : create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
     148              : {
     149            0 :   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
     150            0 :                          "vect_array");
     151              : }
     152              : 
     153              : /* ARRAY is an array of vectors created by create_vector_array.
     154              :    Return an SSA_NAME for the vector in index N.  The reference
     155              :    is part of the vectorization of STMT_INFO and the vector is associated
     156              :    with scalar destination SCALAR_DEST.
     157              :    If we need to ensure that inactive elements are set to zero,
     158              :    NEED_ZEROING is true, MASK contains the loop mask to be used.  */
     159              : 
     160              : static tree
     161            0 : read_vector_array (vec_info *vinfo,
     162              :                    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     163              :                    tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
     164              :                    bool need_zeroing, tree mask)
     165              : {
     166            0 :   tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
     167            0 :   gimple *new_stmt;
     168              : 
     169            0 :   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
     170            0 :   vect_type = TREE_TYPE (TREE_TYPE (array));
     171            0 :   tmp = vect_create_destination_var (scalar_dest, vect_type);
     172            0 :   vect = vect_create_destination_var (scalar_dest, vect_type);
     173            0 :   array_ref = build4 (ARRAY_REF, vect_type, array,
     174            0 :                       build_int_cst (size_type_node, n),
     175              :                       NULL_TREE, NULL_TREE);
     176              : 
     177            0 :   new_stmt = gimple_build_assign (tmp, array_ref);
     178            0 :   tmp_name = make_ssa_name (vect, new_stmt);
     179            0 :   gimple_assign_set_lhs (new_stmt, tmp_name);
     180            0 :   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
     181              : 
     182            0 :   if (need_zeroing)
     183              :     {
     184            0 :       tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
     185              :                                               vect_type);
     186            0 :       vect_name = make_ssa_name (vect, new_stmt);
     187            0 :       new_stmt
     188            0 :         = gimple_build_assign (vect_name, VEC_COND_EXPR,
     189              :                                mask, tmp_name, vec_els);
     190            0 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
     191              :     }
     192              :   else
     193              :     vect_name = tmp_name;
     194              : 
     195            0 :   return vect_name;
     196              : }
     197              : 
     198              : /* ARRAY is an array of vectors created by create_vector_array.
     199              :    Emit code to store SSA_NAME VECT in index N of the array.
     200              :    The store is part of the vectorization of STMT_INFO.  */
     201              : 
     202              : static void
     203            0 : write_vector_array (vec_info *vinfo,
     204              :                     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
     205              :                     tree vect, tree array, unsigned HOST_WIDE_INT n)
     206              : {
     207            0 :   tree array_ref;
     208            0 :   gimple *new_stmt;
     209              : 
     210            0 :   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
     211            0 :                       build_int_cst (size_type_node, n),
     212              :                       NULL_TREE, NULL_TREE);
     213              : 
     214            0 :   new_stmt = gimple_build_assign (array_ref, vect);
     215            0 :   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
     216            0 : }
     217              : 
     218              : /* PTR is a pointer to an array of type TYPE.  Return a representation
     219              :    of *PTR.  The memory reference replaces those in FIRST_DR
     220              :    (and its group).  */
     221              : 
     222              : static tree
     223            0 : create_array_ref (tree type, tree ptr, tree alias_ptr_type)
     224              : {
     225            0 :   tree mem_ref;
     226              : 
     227            0 :   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
     228              :   /* Arrays have the same alignment as their type.  */
     229            0 :   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
     230            0 :   return mem_ref;
     231              : }
     232              : 
     233              : /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
     234              :    Emit the clobber before *GSI.  */
     235              : 
     236              : static void
     237           15 : vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
     238              :                        gimple_stmt_iterator *gsi, tree var)
     239              : {
     240           15 :   tree clobber = build_clobber (TREE_TYPE (var));
     241           15 :   gimple *new_stmt = gimple_build_assign (var, clobber);
     242           15 :   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
     243           15 : }
     244              : 
     245              : /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
     246              : 
     247              : /* Function vect_mark_relevant.
     248              : 
     249              :    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
     250              : 
     251              : static void
     252      3208957 : vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
     253              :                     enum vect_relevant relevant, bool live_p)
     254              : {
     255      3208957 :   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
     256      3208957 :   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
     257              : 
     258      3208957 :   if (dump_enabled_p ())
     259       164206 :     dump_printf_loc (MSG_NOTE, vect_location,
     260              :                      "mark relevant %d, live %d: %G", relevant, live_p,
     261              :                      stmt_info->stmt);
     262              : 
     263              :   /* If this stmt is an original stmt in a pattern, we might need to mark its
     264              :      related pattern stmt instead of the original stmt.  However, such stmts
     265              :      may have their own uses that are not in any pattern, in such cases the
     266              :      stmt itself should be marked.  */
     267      3208957 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
     268              :     {
     269              :       /* This is the last stmt in a sequence that was detected as a
     270              :          pattern that can potentially be vectorized.  Don't mark the stmt
     271              :          as relevant/live because it's not going to be vectorized.
     272              :          Instead mark the pattern-stmt that replaces it.  */
     273              : 
     274       242378 :       if (dump_enabled_p ())
     275         2811 :         dump_printf_loc (MSG_NOTE, vect_location,
     276              :                          "last stmt in pattern. don't mark"
     277              :                          " relevant/live.\n");
     278              : 
     279       242378 :       stmt_vec_info old_stmt_info = stmt_info;
     280       242378 :       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
     281       242378 :       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
     282       242378 :       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
     283       242378 :       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
     284              : 
     285       242378 :       if (live_p && relevant == vect_unused_in_scope)
     286              :         {
     287          110 :           if (dump_enabled_p ())
     288           10 :             dump_printf_loc (MSG_NOTE, vect_location,
     289              :                              "vec_stmt_relevant_p: forcing live pattern stmt "
     290              :                              "relevant.\n");
     291              :           relevant = vect_used_only_live;
     292              :         }
     293              : 
     294       242378 :       if (dump_enabled_p ())
     295         2811 :         dump_printf_loc (MSG_NOTE, vect_location,
     296              :                          "mark relevant %d, live %d: %G", relevant, live_p,
     297              :                          stmt_info->stmt);
     298              :     }
     299              : 
     300      3208957 :   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
     301      3208957 :   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
     302      2878197 :     STMT_VINFO_RELEVANT (stmt_info) = relevant;
     303              : 
     304      3208957 :   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
     305       330760 :       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
     306              :     {
     307       330045 :       if (dump_enabled_p ())
     308        19616 :         dump_printf_loc (MSG_NOTE, vect_location,
     309              :                          "already marked relevant/live.\n");
     310       330045 :       return;
     311              :     }
     312              : 
     313      2878912 :   worklist->safe_push (stmt_info);
     314              : }
     315              : 
     316              : 
     317              : /* Function is_simple_and_all_uses_invariant
     318              : 
     319              :    Return true if STMT_INFO is simple and all uses of it are invariant.  */
     320              : 
     321              : bool
     322       248134 : is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
     323              :                                   loop_vec_info loop_vinfo)
     324              : {
     325       248134 :   tree op;
     326       248134 :   ssa_op_iter iter;
     327              : 
     328       442052 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
     329       194743 :   if (!stmt)
     330              :     return false;
     331              : 
     332       202298 :   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
     333              :     {
     334       201473 :       enum vect_def_type dt = vect_uninitialized_def;
     335              : 
     336       201473 :       if (!vect_is_simple_use (op, loop_vinfo, &dt))
     337              :         {
     338         5326 :           if (dump_enabled_p ())
     339           16 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     340              :                              "use not simple.\n");
     341       193918 :           return false;
     342              :         }
     343              : 
     344       196147 :       if (dt != vect_external_def && dt != vect_constant_def)
     345              :         return false;
     346              :     }
     347              :   return true;
     348              : }
     349              : 
     350              : /* Function vect_stmt_relevant_p.
     351              : 
     352              :    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
     353              :    is "relevant for vectorization".
     354              : 
     355              :    A stmt is considered "relevant for vectorization" if:
     356              :    - it has uses outside the loop.
     357              :    - it has vdefs (it alters memory).
     358              :    - control stmts in the loop (except for the exit condition).
     359              : 
     360              :    CHECKME: what other side effects would the vectorizer allow?  */
     361              : 
     362              : static bool
     363      5159901 : vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
     364              :                       enum vect_relevant *relevant, bool *live_p)
     365              : {
     366      5159901 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     367      5159901 :   ssa_op_iter op_iter;
     368      5159901 :   imm_use_iterator imm_iter;
     369      5159901 :   use_operand_p use_p;
     370      5159901 :   def_operand_p def_p;
     371              : 
     372      5159901 :   *relevant = vect_unused_in_scope;
     373      5159901 :   *live_p = false;
     374              : 
     375              :   /* cond stmt other than loop exit cond.  */
     376      5159901 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
     377      5159901 :   if (is_ctrl_stmt (stmt)
     378       605599 :       && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
     379      5388870 :       && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
     380       226984 :     *relevant = vect_used_in_scope;
     381              : 
     382              :   /* changing memory.  */
     383      5159901 :   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
     384      4279790 :     if (gimple_vdef (stmt_info->stmt)
     385      3674191 :         && !gimple_clobber_p (stmt_info->stmt))
     386              :       {
     387       369431 :         if (dump_enabled_p ())
     388        28039 :           dump_printf_loc (MSG_NOTE, vect_location,
     389              :                            "vec_stmt_relevant_p: stmt has vdefs.\n");
     390       369431 :         *relevant = vect_used_in_scope;
     391       369431 :         if (! STMT_VINFO_DATA_REF (stmt_info)
     392       369431 :             && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
     393           20 :           LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
     394              :       }
     395              : 
     396              :   /* uses outside the loop.  */
     397     14498192 :   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
     398              :     {
     399     15359063 :       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
     400              :         {
     401      7002283 :           basic_block bb = gimple_bb (USE_STMT (use_p));
     402      7002283 :           if (!flow_bb_inside_loop_p (loop, bb))
     403              :             {
     404       262889 :               if (is_gimple_debug (USE_STMT (use_p)))
     405         1093 :                 continue;
     406              : 
     407       261796 :               if (dump_enabled_p ())
     408         5987 :                 dump_printf_loc (MSG_NOTE, vect_location,
     409              :                                  "vec_stmt_relevant_p: used out of loop.\n");
     410              : 
     411              :               /* We expect all such uses to be in the loop exit phis
     412              :                  (because of loop closed form)   */
     413       261796 :               gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
     414              : 
     415       261796 :               *live_p = true;
     416       261796 :               LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
     417              :             }
     418      4178390 :         }
     419              :     }
     420              : 
     421       248136 :   if (*live_p && *relevant == vect_unused_in_scope
     422      5408035 :       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
     423              :     {
     424       247309 :       if (dump_enabled_p ())
     425         5843 :         dump_printf_loc (MSG_NOTE, vect_location,
     426              :                          "vec_stmt_relevant_p: stmt live but not relevant.\n");
     427       247309 :       *relevant = vect_used_only_live;
     428              :     }
     429              : 
     430      5159901 :   return (*live_p || *relevant);
     431              : }
     432              : 
     433              : 
     434              : /* Function exist_non_indexing_operands_for_use_p
     435              : 
     436              :    USE is one of the uses attached to STMT_INFO.  Check if USE is
     437              :    used in STMT_INFO for anything other than indexing an array.  */
     438              : 
     439              : static bool
     440      4301722 : exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
     441              : {
     442      4301722 :   tree operand;
     443              : 
     444              :   /* USE corresponds to some operand in STMT.  If there is no data
     445              :      reference in STMT, then any operand that corresponds to USE
     446              :      is not indexing an array.  */
     447      4301722 :   if (!STMT_VINFO_DATA_REF (stmt_info))
     448              :     return true;
     449              : 
     450              :   /* STMT has a data_ref. FORNOW this means that its of one of
     451              :      the following forms:
     452              :      -1- ARRAY_REF = var
     453              :      -2- var = ARRAY_REF
     454              :      (This should have been verified in analyze_data_refs).
     455              : 
     456              :      'var' in the second case corresponds to a def, not a use,
     457              :      so USE cannot correspond to any operands that are not used
     458              :      for array indexing.
     459              : 
     460              :      Therefore, all we need to check is if STMT falls into the
     461              :      first case, and whether var corresponds to USE.  */
     462              : 
     463      1469152 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
     464      1450946 :   if (!assign || !gimple_assign_copy_p (assign))
     465              :     {
     466       787144 :       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
     467        18206 :       if (call && gimple_call_internal_p (call))
     468              :         {
     469        18206 :           internal_fn ifn = gimple_call_internal_fn (call);
     470        18206 :           int mask_index = internal_fn_mask_index (ifn);
     471        18206 :           if (mask_index >= 0
     472        18206 :               && use == gimple_call_arg (call, mask_index))
     473              :             return true;
     474        11817 :           int els_index = internal_fn_else_index (ifn);
     475        11817 :           if (els_index >= 0
     476        11817 :               && use == gimple_call_arg (call, els_index))
     477              :             return true;
     478        10312 :           int stored_value_index = internal_fn_stored_value_index (ifn);
     479        10312 :           if (stored_value_index >= 0
     480        10312 :               && use == gimple_call_arg (call, stored_value_index))
     481              :             return true;
     482         8090 :           if (internal_gather_scatter_fn_p (ifn)
     483         8090 :               && use == gimple_call_arg (call, 1))
     484              :             return true;
     485              :         }
     486       777028 :       return false;
     487              :     }
     488              : 
     489       682008 :   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
     490              :     return false;
     491       682008 :   operand = gimple_assign_rhs1 (assign);
     492       682008 :   if (TREE_CODE (operand) != SSA_NAME)
     493              :     return false;
     494              : 
     495       590609 :   if (operand == use)
     496              :     return true;
     497              : 
     498              :   return false;
     499              : }
     500              : 
     501              : 
     502              : /*
     503              :    Function process_use.
     504              : 
     505              :    Inputs:
     506              :    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
     507              :    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
     508              :      that defined USE.  This is done by calling mark_relevant and passing it
     509              :      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
     510              :    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
     511              :      be performed.
     512              : 
     513              :    Outputs:
     514              :    Generally, LIVE_P and RELEVANT are used to define the liveness and
     515              :    relevance info of the DEF_STMT of this USE:
     516              :        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
     517              :        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
     518              :    Exceptions:
     519              :    - case 1: If USE is used only for address computations (e.g. array indexing),
     520              :    which does not need to be directly vectorized, then the liveness/relevance
     521              :    of the respective DEF_STMT is left unchanged.
     522              :    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
     523              :    we skip DEF_STMT cause it had already been processed.
     524              :    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
     525              :    "relevant" will be modified accordingly.
     526              : 
     527              :    Return true if everything is as expected. Return false otherwise.  */
     528              : 
     529              : static opt_result
     530      4357685 : process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
     531              :              enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
     532              :              bool force)
     533              : {
     534      4357685 :   stmt_vec_info dstmt_vinfo;
     535      4357685 :   enum vect_def_type dt;
     536              : 
     537              :   /* case 1: we are only interested in uses that need to be vectorized.  Uses
     538              :      that are used for address computation are not considered relevant.  */
     539      4357685 :   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
     540      1180077 :     return opt_result::success ();
     541              : 
     542      3177608 :   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
     543        34802 :     return opt_result::failure_at (stmt_vinfo->stmt,
     544              :                                    "not vectorized:"
     545              :                                    " unsupported use in stmt.\n");
     546              : 
     547      3142806 :   if (!dstmt_vinfo)
     548       590743 :     return opt_result::success ();
     549              : 
     550      2552063 :   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
     551      2552063 :   basic_block bb = gimple_bb (stmt_vinfo->stmt);
     552              : 
     553              :   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
     554              :      We have to force the stmt live since the epilogue loop needs it to
     555              :      continue computing the reduction.  */
     556      2552063 :   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
     557       268774 :       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
     558        84837 :       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
     559        84837 :       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
     560      2636900 :       && bb->loop_father == def_bb->loop_father)
     561              :     {
     562        84837 :       if (dump_enabled_p ())
     563         3930 :         dump_printf_loc (MSG_NOTE, vect_location,
     564              :                          "reduc-stmt defining reduc-phi in the same nest.\n");
     565        84837 :       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
     566        84837 :       return opt_result::success ();
     567              :     }
     568              : 
     569              :   /* case 3a: outer-loop stmt defining an inner-loop stmt:
     570              :         outer-loop-header-bb:
     571              :                 d = dstmt_vinfo
     572              :         inner-loop:
     573              :                 stmt # use (d)
     574              :         outer-loop-tail-bb:
     575              :                 ...               */
     576      2467226 :   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
     577              :     {
     578         2237 :       if (dump_enabled_p ())
     579          321 :         dump_printf_loc (MSG_NOTE, vect_location,
     580              :                          "outer-loop def-stmt defining inner-loop stmt.\n");
     581              : 
     582         2237 :       switch (relevant)
     583              :         {
     584            0 :         case vect_unused_in_scope:
     585            0 :           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
     586              :                       vect_used_in_scope : vect_unused_in_scope;
     587              :           break;
     588              : 
     589          776 :         case vect_used_in_outer_by_reduction:
     590          776 :           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
     591              :           relevant = vect_used_by_reduction;
     592              :           break;
     593              : 
     594         1181 :         case vect_used_in_outer:
     595         1181 :           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
     596              :           relevant = vect_used_in_scope;
     597              :           break;
     598              : 
     599              :         case vect_used_in_scope:
     600              :           break;
     601              : 
     602            0 :         default:
     603            0 :           gcc_unreachable ();
     604              :         }
     605              :     }
     606              : 
     607              :   /* case 3b: inner-loop stmt defining an outer-loop stmt:
     608              :         outer-loop-header-bb:
     609              :                 ...
     610              :         inner-loop:
     611              :                 d = dstmt_vinfo
     612              :         outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
     613              :                 stmt # use (d)          */
     614      2464989 :   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
     615              :     {
     616         2100 :       if (dump_enabled_p ())
     617          626 :         dump_printf_loc (MSG_NOTE, vect_location,
     618              :                          "inner-loop def-stmt defining outer-loop stmt.\n");
     619              : 
     620         2100 :       switch (relevant)
     621              :         {
     622            0 :         case vect_unused_in_scope:
     623            0 :           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
     624            0 :             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
     625              :                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
     626              :           break;
     627              : 
     628              :         case vect_used_by_reduction:
     629              :         case vect_used_only_live:
     630              :           relevant = vect_used_in_outer_by_reduction;
     631              :           break;
     632              : 
     633              :         case vect_used_in_scope:
     634      2289659 :           relevant = vect_used_in_outer;
     635              :           break;
     636              : 
     637            0 :         default:
     638            0 :           gcc_unreachable ();
     639              :         }
     640              :     }
     641              :   /* We are also not interested in uses on loop PHI backedges that are
     642              :      inductions.  Otherwise we'll needlessly vectorize the IV increment
     643              :      and cause hybrid SLP for SLP inductions.  */
     644      2462889 :   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
     645       180603 :            && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
     646      2640456 :            && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
     647              :                                       loop_latch_edge (bb->loop_father))
     648              :                == use))
     649              :     {
     650       177567 :       if (dump_enabled_p ())
     651         4868 :         dump_printf_loc (MSG_NOTE, vect_location,
     652              :                          "induction value on backedge.\n");
     653       177567 :       return opt_result::success ();
     654              :     }
     655              : 
     656      2289659 :   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
     657      2289659 :   return opt_result::success ();
     658              : }
     659              : 
     660              : 
     661              : /* Function vect_mark_stmts_to_be_vectorized.
     662              : 
     663              :    Not all stmts in the loop need to be vectorized. For example:
     664              : 
     665              :      for i...
     666              :        for j...
     667              :    1.    T0 = i + j
     668              :    2.    T1 = a[T0]
     669              : 
     670              :    3.    j = j + 1
     671              : 
     672              :    Stmt 1 and 3 do not need to be vectorized, because loop control and
     673              :    addressing of vectorized data-refs are handled differently.
     674              : 
     675              :    This pass detects such stmts.  */
     676              : 
     677              : opt_result
     678       432442 : vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
     679              : {
     680       432442 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     681       432442 :   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
     682       432442 :   unsigned int nbbs = loop->num_nodes;
     683       432442 :   gimple_stmt_iterator si;
     684       432442 :   unsigned int i;
     685       432442 :   basic_block bb;
     686       432442 :   bool live_p;
     687       432442 :   enum vect_relevant relevant;
     688              : 
     689       432442 :   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
     690              : 
     691       432442 :   auto_vec<stmt_vec_info, 64> worklist;
     692              : 
     693              :   /* 1. Init worklist.  */
     694      1464245 :   for (i = 0; i < nbbs; i++)
     695              :     {
     696      1042035 :       bb = bbs[i];
     697      2142236 :       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
     698              :         {
     699      2220578 :           if (virtual_operand_p (gimple_phi_result (gsi_stmt (si))))
     700       230178 :             continue;
     701       880111 :           stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
     702       880111 :           if (dump_enabled_p ())
     703        41647 :             dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
     704              :                              phi_info->stmt);
     705              : 
     706       880111 :           if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
     707              :             {
     708        44509 :               if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
     709        10088 :                 return opt_result::failure_at
     710        10088 :                   (*si, "not vectorized: unhandled relevant PHI: %G", *si);
     711        34421 :               vect_mark_relevant (&worklist, phi_info, relevant, live_p);
     712              :             }
     713              :         }
     714      8252656 :       for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si))
     715              :         {
     716      7220853 :           gimple *stmt = gsi_stmt (si);
     717      7220853 :           if (is_gimple_debug (stmt))
     718      2940919 :             continue;
     719      4279934 :           stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
     720      4279934 :           if (dump_enabled_p ())
     721       223156 :               dump_printf_loc (MSG_NOTE, vect_location,
     722              :                                "init: stmt relevant? %G", stmt);
     723              : 
     724      4279934 :           if (gimple_get_lhs (stmt) == NULL_TREE
     725       611898 :               && !is_a <gcond *> (stmt)
     726      4286233 :               && !is_a <gcall *> (stmt))
     727          144 :             return opt_result::failure_at
     728          144 :                 (stmt, "not vectorized: irregular stmt: %G", stmt);
     729              : 
     730      4279790 :           if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
     731       800040 :             vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
     732              :         }
     733              :     }
     734              : 
     735              :   /* 2. Process_worklist */
     736      3177624 :   while (worklist.length () > 0)
     737              :     {
     738      2790218 :       use_operand_p use_p;
     739      2790218 :       ssa_op_iter iter;
     740              : 
     741      2790218 :       stmt_vec_info stmt_vinfo = worklist.pop ();
     742      2790218 :       if (dump_enabled_p ())
     743       143950 :         dump_printf_loc (MSG_NOTE, vect_location,
     744              :                          "worklist: examine stmt: %G", stmt_vinfo->stmt);
     745              : 
     746              :       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
     747              :          (DEF_STMT) as relevant/irrelevant according to the relevance property
     748              :          of STMT.  */
     749      2790218 :       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
     750              : 
     751              :       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
     752              :          propagated as is to the DEF_STMTs of its USEs.
     753              : 
     754              :          One exception is when STMT has been identified as defining a reduction
     755              :          variable; in this case we set the relevance to vect_used_by_reduction.
     756              :          This is because we distinguish between two kinds of relevant stmts -
     757              :          those that are used by a reduction computation, and those that are
     758              :          (also) used by a regular computation.  This allows us later on to
     759              :          identify stmts that are used solely by a reduction, and therefore the
     760              :          order of the results that they produce does not have to be kept.  */
     761              : 
     762      2790218 :       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
     763              :         {
     764       171769 :           case vect_reduction_def:
     765       171769 :             gcc_assert (relevant != vect_unused_in_scope);
     766       171769 :             if (relevant != vect_unused_in_scope
     767       171769 :                 && relevant != vect_used_in_scope
     768       171769 :                 && relevant != vect_used_by_reduction
     769       171769 :                 && relevant != vect_used_only_live)
     770            0 :               return opt_result::failure_at
     771            0 :                 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
     772              :             break;
     773              : 
     774         2209 :           case vect_nested_cycle:
     775         2209 :             if (relevant != vect_unused_in_scope
     776         2209 :                 && relevant != vect_used_in_outer_by_reduction
     777         1614 :                 && relevant != vect_used_in_outer)
     778            2 :               return opt_result::failure_at
     779            2 :                 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
     780              :             break;
     781              : 
     782         1209 :           case vect_double_reduction_def:
     783         1209 :             if (relevant != vect_unused_in_scope
     784         1209 :                 && relevant != vect_used_by_reduction
     785          409 :                 && relevant != vect_used_only_live)
     786            0 :               return opt_result::failure_at
     787            0 :                 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
     788              :             break;
     789              : 
     790              :           default:
     791              :             break;
     792              :         }
     793              : 
     794      2790216 :       if (is_pattern_stmt_p (stmt_vinfo))
     795              :         {
     796              :           /* Pattern statements are not inserted into the code, so
     797              :              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
     798              :              have to scan the RHS or function arguments instead.  */
     799       624764 :           if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
     800              :             {
     801       406979 :               enum tree_code rhs_code = gimple_assign_rhs_code (assign);
     802       406979 :               tree op = gimple_assign_rhs1 (assign);
     803              : 
     804       406979 :               i = 1;
     805       406979 :               if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
     806              :                 {
     807            0 :                   opt_result res
     808            0 :                     = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
     809              :                                    loop_vinfo, relevant, &worklist, false);
     810            0 :                   if (!res)
     811            0 :                     return res;
     812            0 :                   res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
     813              :                                      loop_vinfo, relevant, &worklist, false);
     814            0 :                   if (!res)
     815            0 :                     return res;
     816              :                   i = 2;
     817              :                 }
     818      1171658 :               for (; i < gimple_num_ops (assign); i++)
     819              :                 {
     820       768433 :                   op = gimple_op (assign, i);
     821       768433 :                   if (TREE_CODE (op) == SSA_NAME)
     822              :                     {
     823       584446 :                       opt_result res
     824       584446 :                         = process_use (stmt_vinfo, op, loop_vinfo, relevant,
     825              :                                        &worklist, false);
     826       584446 :                       if (!res)
     827         3754 :                         return res;
     828              :                     }
     829              :                  }
     830              :             }
     831       217785 :           else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
     832              :             {
     833       211158 :               tree_code rhs_code = gimple_cond_code (cond);
     834       211158 :               gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
     835       211158 :               opt_result res
     836       211158 :                 = process_use (stmt_vinfo, gimple_cond_lhs (cond),
     837              :                                loop_vinfo, relevant, &worklist, false);
     838       211158 :               if (!res)
     839        34804 :                 return res;
     840       211158 :               res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
     841              :                                 loop_vinfo, relevant, &worklist, false);
     842       211158 :               if (!res)
     843            0 :                 return res;
     844              :             }
     845         6627 :           else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
     846              :             {
     847        31687 :               for (i = 0; i < gimple_call_num_args (call); i++)
     848              :                 {
     849        25060 :                   tree arg = gimple_call_arg (call, i);
     850        25060 :                   opt_result res
     851        25060 :                     = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
     852              :                                    &worklist, false);
     853        25060 :                   if (!res)
     854            0 :                     return res;
     855              :                 }
     856              :             }
     857              :           else
     858            0 :             gcc_unreachable ();
     859              :         }
     860              :       else
     861      7582949 :         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
     862              :           {
     863      3269900 :             tree op = USE_FROM_PTR (use_p);
     864      3269900 :             opt_result res
     865      3269900 :               = process_use (stmt_vinfo, op, loop_vinfo, relevant,
     866              :                              &worklist, false);
     867      3269900 :             if (!res)
     868        17855 :               return res;
     869              :           }
     870              : 
     871      2768607 :       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
     872              :         {
     873        55963 :           gather_scatter_info gs_info;
     874        55963 :           if (!vect_check_gather_scatter (stmt_vinfo,
     875              :                                           STMT_VINFO_VECTYPE (stmt_vinfo),
     876              :                                           loop_vinfo, &gs_info))
     877            0 :             gcc_unreachable ();
     878        55963 :           opt_result res
     879        55963 :             = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
     880              :                            &worklist, true);
     881        55963 :           if (!res)
     882              :             {
     883        13193 :               if (fatal)
     884        13193 :                 *fatal = false;
     885        13193 :               return res;
     886              :             }
     887              :         }
     888              :     } /* while worklist */
     889              : 
     890       387406 :   return opt_result::success ();
     891       432442 : }
     892              : 
     893              : /* Function vect_model_simple_cost.
     894              : 
     895              :    Models cost for simple operations, i.e. those that only emit N operations
     896              :    of the same KIND.  */
     897              : 
     898              : static void
     899       773633 : vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node,
     900              :                         stmt_vector_for_cost *cost_vec,
     901              :                         vect_cost_for_stmt kind = vector_stmt)
     902              : {
     903       773633 :   int inside_cost = 0, prologue_cost = 0;
     904              : 
     905       773633 :   gcc_assert (cost_vec != NULL);
     906              : 
     907       773633 :   n *= vect_get_num_copies (vinfo, node);
     908              : 
     909              :   /* Pass the inside-of-loop statements to the target-specific cost model.  */
     910       773633 :   inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body);
     911              : 
     912       773633 :   if (dump_enabled_p ())
     913        33237 :     dump_printf_loc (MSG_NOTE, vect_location,
     914              :                      "vect_model_simple_cost: inside_cost = %d, "
     915              :                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
     916       773633 : }
     917              : 
     918              : 
     919              : /* Model cost for type demotion and promotion operations.  PWR is
     920              :    normally zero for single-step promotions and demotions.  It will be
     921              :    one if two-step promotion/demotion is required, and so on.  NCOPIES
     922              :    is the number of vector results (and thus number of instructions)
     923              :    for the narrowest end of the operation chain.  Each additional
     924              :    step doubles the number of instructions required.  If WIDEN_ARITH
     925              :    is true the stmt is doing widening arithmetic.  */
     926              : 
     927              : static void
     928        68423 : vect_model_promotion_demotion_cost (slp_tree slp_node,
     929              :                                     unsigned int ncopies, int pwr,
     930              :                                     stmt_vector_for_cost *cost_vec,
     931              :                                     bool widen_arith)
     932              : {
     933        68423 :   int i;
     934        68423 :   int inside_cost = 0, prologue_cost = 0;
     935              : 
     936       159665 :   for (i = 0; i < pwr + 1; i++)
     937              :     {
     938       180766 :       inside_cost += record_stmt_cost (cost_vec, ncopies,
     939              :                                        widen_arith
     940              :                                        ? vector_stmt : vec_promote_demote,
     941              :                                        slp_node, 0, vect_body);
     942        91242 :       ncopies *= 2;
     943              :     }
     944              : 
     945        68423 :   if (dump_enabled_p ())
     946         6384 :     dump_printf_loc (MSG_NOTE, vect_location,
     947              :                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
     948              :                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
     949        68423 : }
     950              : 
     951              : /* Returns true if the current function returns DECL.  */
     952              : 
     953              : static bool
     954       557674 : cfun_returns (tree decl)
     955              : {
     956       557674 :   edge_iterator ei;
     957       557674 :   edge e;
     958      1098083 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     959              :     {
     960      1104162 :       greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
     961       552081 :       if (!ret)
     962            0 :         continue;
     963       552081 :       if (gimple_return_retval (ret) == decl)
     964              :         return true;
     965              :       /* We often end up with an aggregate copy to the result decl,
     966              :          handle that case as well.  First skip intermediate clobbers
     967              :          though.  */
     968              :       gimple *def = ret;
     969      1666124 :       do
     970              :         {
     971      3332248 :           def = SSA_NAME_DEF_STMT (gimple_vuse (def));
     972              :         }
     973      1666124 :       while (gimple_clobber_p (def));
     974       541110 :       if (is_a <gassign *> (def)
     975        61468 :           && gimple_assign_lhs (def) == gimple_return_retval (ret)
     976       548166 :           && gimple_assign_rhs1 (def) == decl)
     977              :         return true;
     978              :     }
     979              :   return false;
     980              : }
     981              : 
     982              : /* Calculate cost of DR's memory access.  */
     983              : void
     984      1010004 : vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
     985              :                      int ncopies, dr_alignment_support alignment_support_scheme,
     986              :                      int misalignment,
     987              :                      unsigned int *inside_cost,
     988              :                      stmt_vector_for_cost *body_cost_vec)
     989              : {
     990      1010004 :   tree vectype
     991      1010004 :     = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
     992      1010004 :   switch (alignment_support_scheme)
     993              :     {
     994       552072 :     case dr_aligned:
     995       552072 :       {
     996       552072 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
     997              :                                           vector_store, stmt_info, slp_node,
     998              :                                           vectype, 0, vect_body);
     999              : 
    1000       552072 :         if (dump_enabled_p ())
    1001        14515 :           dump_printf_loc (MSG_NOTE, vect_location,
    1002              :                            "vect_model_store_cost: aligned.\n");
    1003              :         break;
    1004              :       }
    1005              : 
    1006       457932 :     case dr_unaligned_supported:
    1007       457932 :       {
    1008              :         /* Here, we assign an additional cost for the unaligned store.  */
    1009       457932 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
    1010              :                                           unaligned_store, stmt_info, slp_node,
    1011              :                                           vectype, misalignment, vect_body);
    1012       457932 :         if (dump_enabled_p ())
    1013        12920 :           dump_printf_loc (MSG_NOTE, vect_location,
    1014              :                            "vect_model_store_cost: unaligned supported by "
    1015              :                            "hardware.\n");
    1016              :         break;
    1017              :       }
    1018              : 
    1019            0 :     case dr_unaligned_unsupported:
    1020            0 :       {
    1021            0 :         *inside_cost = VECT_MAX_COST;
    1022              : 
    1023            0 :         if (dump_enabled_p ())
    1024            0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1025              :                            "vect_model_store_cost: unsupported access.\n");
    1026              :         break;
    1027              :       }
    1028              : 
    1029            0 :     default:
    1030            0 :       gcc_unreachable ();
    1031              :     }
    1032      1010004 : }
    1033              : 
    1034              : /* Calculate cost of DR's memory access.  */
    1035              : void
    1036       923672 : vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
    1037              :                     int ncopies, dr_alignment_support alignment_support_scheme,
    1038              :                     int misalignment,
    1039              :                     bool add_realign_cost, unsigned int *inside_cost,
    1040              :                     unsigned int *prologue_cost,
    1041              :                     stmt_vector_for_cost *prologue_cost_vec,
    1042              :                     stmt_vector_for_cost *body_cost_vec,
    1043              :                     bool record_prologue_costs)
    1044              : {
    1045       923672 :   tree vectype
    1046       923672 :     = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
    1047       923672 :   switch (alignment_support_scheme)
    1048              :     {
    1049       525419 :     case dr_aligned:
    1050       525419 :       {
    1051       525419 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
    1052              :                                           stmt_info, slp_node, vectype,
    1053              :                                           0, vect_body);
    1054              : 
    1055       525419 :         if (dump_enabled_p ())
    1056        18859 :           dump_printf_loc (MSG_NOTE, vect_location,
    1057              :                            "vect_model_load_cost: aligned.\n");
    1058              : 
    1059              :         break;
    1060              :       }
    1061       342294 :     case dr_unaligned_supported:
    1062       342294 :       {
    1063              :         /* Here, we assign an additional cost for the unaligned load.  */
    1064       342294 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
    1065              :                                           unaligned_load, stmt_info, slp_node,
    1066              :                                           vectype, misalignment, vect_body);
    1067              : 
    1068       342294 :         if (dump_enabled_p ())
    1069        22307 :           dump_printf_loc (MSG_NOTE, vect_location,
    1070              :                            "vect_model_load_cost: unaligned supported by "
    1071              :                            "hardware.\n");
    1072              : 
    1073              :         break;
    1074              :       }
    1075            0 :     case dr_explicit_realign:
    1076            0 :       {
    1077            0 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
    1078              :                                           vector_load, stmt_info, slp_node,
    1079              :                                           vectype, 0, vect_body);
    1080            0 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
    1081              :                                           vec_perm, stmt_info, slp_node,
    1082              :                                           vectype, 0, vect_body);
    1083              : 
    1084              :         /* FIXME: If the misalignment remains fixed across the iterations of
    1085              :            the containing loop, the following cost should be added to the
    1086              :            prologue costs.  */
    1087            0 :         if (targetm.vectorize.builtin_mask_for_load)
    1088            0 :           *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
    1089              :                                             stmt_info, slp_node, vectype,
    1090              :                                             0, vect_body);
    1091              : 
    1092            0 :         if (dump_enabled_p ())
    1093            0 :           dump_printf_loc (MSG_NOTE, vect_location,
    1094              :                            "vect_model_load_cost: explicit realign\n");
    1095              : 
    1096              :         break;
    1097              :       }
    1098            0 :     case dr_explicit_realign_optimized:
    1099            0 :       {
    1100            0 :         if (dump_enabled_p ())
    1101            0 :           dump_printf_loc (MSG_NOTE, vect_location,
    1102              :                            "vect_model_load_cost: unaligned software "
    1103              :                            "pipelined.\n");
    1104              : 
    1105              :         /* Unaligned software pipeline has a load of an address, an initial
    1106              :            load, and possibly a mask operation to "prime" the loop.  However,
    1107              :            if this is an access in a group of loads, which provide grouped
    1108              :            access, then the above cost should only be considered for one
    1109              :            access in the group.  Inside the loop, there is a load op
    1110              :            and a realignment op.  */
    1111              : 
    1112            0 :         if (add_realign_cost && record_prologue_costs)
    1113              :           {
    1114            0 :             *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
    1115              :                                                 vector_stmt, stmt_info,
    1116              :                                                 slp_node, vectype,
    1117              :                                                 0, vect_prologue);
    1118            0 :             if (targetm.vectorize.builtin_mask_for_load)
    1119            0 :               *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
    1120              :                                                   vector_stmt, stmt_info,
    1121              :                                                   slp_node, vectype,
    1122              :                                                   0, vect_prologue);
    1123              :           }
    1124              : 
    1125            0 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
    1126              :                                           stmt_info, slp_node, vectype,
    1127              :                                           0, vect_body);
    1128            0 :         *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
    1129              :                                           stmt_info, slp_node, vectype,
    1130              :                                           0, vect_body);
    1131              : 
    1132            0 :         if (dump_enabled_p ())
    1133            0 :           dump_printf_loc (MSG_NOTE, vect_location,
    1134              :                            "vect_model_load_cost: explicit realign optimized"
    1135              :                            "\n");
    1136              : 
    1137              :         break;
    1138              :       }
    1139              : 
    1140        55959 :     case dr_unaligned_unsupported:
    1141        55959 :       {
    1142        55959 :         *inside_cost = VECT_MAX_COST;
    1143              : 
    1144        55959 :         if (dump_enabled_p ())
    1145          104 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1146              :                            "vect_model_load_cost: unsupported access.\n");
    1147              :         break;
    1148              :       }
    1149              : 
    1150            0 :     default:
    1151            0 :       gcc_unreachable ();
    1152              :     }
    1153       923672 : }
    1154              : 
    1155              : /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
    1156              :    the loop preheader for the vectorized stmt STMT_VINFO.  */
    1157              : 
    1158              : static void
    1159         6645 : vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
    1160              :                     gimple_stmt_iterator *gsi)
    1161              : {
    1162         6645 :   if (gsi)
    1163         3346 :     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
    1164              :   else
    1165         3299 :     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
    1166              : 
    1167         6645 :   if (dump_enabled_p ())
    1168         1815 :     dump_printf_loc (MSG_NOTE, vect_location,
    1169              :                      "created new init_stmt: %G", new_stmt);
    1170         6645 : }
    1171              : 
    1172              : /* Function vect_init_vector.
    1173              : 
    1174              :    Insert a new stmt (INIT_STMT) that initializes a new variable of type
    1175              :    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
    1176              :    vector type a vector with all elements equal to VAL is created first.
    1177              :    Place the initialization at GSI if it is not NULL.  Otherwise, place the
    1178              :    initialization at the loop preheader.
    1179              :    Return the DEF of INIT_STMT.
    1180              :    It will be used in the vectorization of STMT_INFO.  */
    1181              : 
    1182              : tree
    1183         4926 : vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
    1184              :                   gimple_stmt_iterator *gsi)
    1185              : {
    1186         4926 :   gimple *init_stmt;
    1187         4926 :   tree new_temp;
    1188              : 
    1189              :   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
    1190         4926 :   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
    1191              :     {
    1192         1335 :       gcc_assert (VECTOR_TYPE_P (type));
    1193         1335 :       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
    1194              :         {
    1195              :           /* Scalar boolean value should be transformed into
    1196              :              all zeros or all ones value before building a vector.  */
    1197           11 :           if (VECTOR_BOOLEAN_TYPE_P (type))
    1198              :             {
    1199            3 :               tree true_val = build_all_ones_cst (TREE_TYPE (type));
    1200            3 :               tree false_val = build_zero_cst (TREE_TYPE (type));
    1201              : 
    1202            3 :               if (CONSTANT_CLASS_P (val))
    1203            0 :                 val = integer_zerop (val) ? false_val : true_val;
    1204              :               else
    1205              :                 {
    1206            3 :                   new_temp = make_ssa_name (TREE_TYPE (type));
    1207            3 :                   init_stmt = gimple_build_assign (new_temp, COND_EXPR,
    1208              :                                                    val, true_val, false_val);
    1209            3 :                   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
    1210            3 :                   val = new_temp;
    1211              :                 }
    1212              :             }
    1213              :           else
    1214              :             {
    1215            8 :               gimple_seq stmts = NULL;
    1216            8 :               if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
    1217            8 :                 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
    1218            8 :                                     TREE_TYPE (type), val);
    1219              :               else
    1220              :                 /* ???  Condition vectorization expects us to do
    1221              :                    promotion of invariant/external defs.  */
    1222            0 :                 val = gimple_convert (&stmts, TREE_TYPE (type), val);
    1223           16 :               for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
    1224           16 :                    !gsi_end_p (gsi2); )
    1225              :                 {
    1226            8 :                   init_stmt = gsi_stmt (gsi2);
    1227            8 :                   gsi_remove (&gsi2, false);
    1228            8 :                   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
    1229              :                 }
    1230              :             }
    1231              :         }
    1232         1335 :       val = build_vector_from_val (type, val);
    1233              :     }
    1234              : 
    1235         4926 :   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
    1236         4926 :   init_stmt = gimple_build_assign (new_temp, val);
    1237         4926 :   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
    1238         4926 :   return new_temp;
    1239              : }
    1240              : 
    1241              : 
    1242              : /* Get vectorized definitions for OP0 and OP1.  */
    1243              : 
    1244              : void
    1245       187560 : vect_get_vec_defs (vec_info *, slp_tree slp_node,
    1246              :                    tree op0, vec<tree> *vec_oprnds0,
    1247              :                    tree op1, vec<tree> *vec_oprnds1,
    1248              :                    tree op2, vec<tree> *vec_oprnds2,
    1249              :                    tree op3, vec<tree> *vec_oprnds3)
    1250              : {
    1251       187560 :   if (op0)
    1252       185910 :     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
    1253       187560 :   if (op1)
    1254       138090 :     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
    1255       187560 :   if (op2)
    1256         9265 :     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
    1257       187560 :   if (op3)
    1258            0 :     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
    1259       187560 : }
    1260              : 
    1261              : /* Helper function called by vect_finish_replace_stmt and
    1262              :    vect_finish_stmt_generation.  Set the location of the new
    1263              :    statement and create and return a stmt_vec_info for it.  */
    1264              : 
    1265              : static void
    1266      1435319 : vect_finish_stmt_generation_1 (vec_info *,
    1267              :                                stmt_vec_info stmt_info, gimple *vec_stmt)
    1268              : {
    1269      1435319 :   if (dump_enabled_p ())
    1270       148123 :     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
    1271              : 
    1272      1435319 :   if (stmt_info)
    1273              :     {
    1274      1404106 :       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
    1275              : 
    1276              :       /* While EH edges will generally prevent vectorization, stmt might
    1277              :          e.g. be in a must-not-throw region.  Ensure newly created stmts
    1278              :          that could throw are part of the same region.  */
    1279      1404106 :       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
    1280      1404106 :       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
    1281           48 :         add_stmt_to_eh_lp (vec_stmt, lp_nr);
    1282              :     }
    1283              :   else
    1284        31213 :     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
    1285      1435319 : }
    1286              : 
    1287              : /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
    1288              :    which sets the same scalar result as STMT_INFO did.  Create and return a
    1289              :    stmt_vec_info for VEC_STMT.  */
    1290              : 
    1291              : void
    1292          895 : vect_finish_replace_stmt (vec_info *vinfo,
    1293              :                           stmt_vec_info stmt_info, gimple *vec_stmt)
    1294              : {
    1295          895 :   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
    1296          895 :   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
    1297              : 
    1298          895 :   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
    1299          895 :   gsi_replace (&gsi, vec_stmt, true);
    1300              : 
    1301          895 :   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
    1302          895 : }
    1303              : 
    1304              : /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
    1305              :    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
    1306              : 
    1307              : void
    1308      1434424 : vect_finish_stmt_generation (vec_info *vinfo,
    1309              :                              stmt_vec_info stmt_info, gimple *vec_stmt,
    1310              :                              gimple_stmt_iterator *gsi)
    1311              : {
    1312      1434424 :   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
    1313              : 
    1314      1434424 :   if (!gsi_end_p (*gsi)
    1315      2867575 :       && gimple_has_mem_ops (vec_stmt))
    1316              :     {
    1317      1433151 :       gimple *at_stmt = gsi_stmt (*gsi);
    1318      1433151 :       tree vuse = gimple_vuse (at_stmt);
    1319      1426723 :       if (vuse && TREE_CODE (vuse) == SSA_NAME)
    1320              :         {
    1321      1285298 :           tree vdef = gimple_vdef (at_stmt);
    1322      1285298 :           gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
    1323      1285298 :           gimple_set_modified (vec_stmt, true);
    1324              :           /* If we have an SSA vuse and insert a store, update virtual
    1325              :              SSA form to avoid triggering the renamer.  Do so only
    1326              :              if we can easily see all uses - which is what almost always
    1327              :              happens with the way vectorized stmts are inserted.  */
    1328       752688 :           if ((vdef && TREE_CODE (vdef) == SSA_NAME)
    1329      2037950 :               && ((is_gimple_assign (vec_stmt)
    1330       751781 :                    && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
    1331        64960 :                   || (is_gimple_call (vec_stmt)
    1332          871 :                       && (!(gimple_call_flags (vec_stmt)
    1333          871 :                             & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
    1334            1 :                           || (gimple_call_lhs (vec_stmt)
    1335            1 :                               && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
    1336              :             {
    1337       688562 :               tree new_vdef = copy_ssa_name (vuse, vec_stmt);
    1338       688562 :               gimple_set_vdef (vec_stmt, new_vdef);
    1339       688562 :               SET_USE (gimple_vuse_op (at_stmt), new_vdef);
    1340              :             }
    1341              :         }
    1342              :     }
    1343      1434424 :   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
    1344      1434424 :   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
    1345      1434424 : }
    1346              : 
    1347              : /* We want to vectorize a call to combined function CFN with function
    1348              :    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
    1349              :    as the types of all inputs.  Check whether this is possible using
    1350              :    an internal function, returning its code if so or IFN_LAST if not.  */
    1351              : 
    1352              : static internal_fn
    1353        16255 : vectorizable_internal_function (combined_fn cfn, tree fndecl,
    1354              :                                 tree vectype_out, tree vectype_in)
    1355              : {
    1356        16255 :   internal_fn ifn;
    1357        16255 :   if (internal_fn_p (cfn))
    1358        13813 :     ifn = as_internal_fn (cfn);
    1359              :   else
    1360         2442 :     ifn = associated_internal_fn (fndecl);
    1361        16255 :   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
    1362              :     {
    1363        12863 :       const direct_internal_fn_info &info = direct_internal_fn (ifn);
    1364        12863 :       if (info.vectorizable)
    1365              :         {
    1366        12863 :           bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
    1367        12863 :           tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
    1368        12863 :           tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
    1369              : 
    1370              :           /* The type size of both the vectype_in and vectype_out should be
    1371              :              exactly the same when vectype_out isn't participating the optab.
    1372              :              While there is no restriction for type size when vectype_out
    1373              :              is part of the optab query.  */
    1374        12863 :           if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
    1375              :             return IFN_LAST;
    1376              : 
    1377        12843 :           if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
    1378              :                                               OPTIMIZE_FOR_SPEED))
    1379              :             return ifn;
    1380              :         }
    1381              :     }
    1382              :   return IFN_LAST;
    1383              : }
    1384              : 
    1385              : 
    1386              : static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
    1387              :                                   gimple_stmt_iterator *);
    1388              : 
    1389              : /* Check whether a load or store statement in the loop described by
    1390              :    LOOP_VINFO is possible in a loop using partial vectors.  This is
    1391              :    testing whether the vectorizer pass has the appropriate support,
    1392              :    as well as whether the target does.
    1393              : 
    1394              :    VLS_TYPE says whether the statement is a load or store and VECTYPE
    1395              :    is the type of the vector being loaded or stored.  SLP_NODE is the SLP
    1396              :    node that contains the statement, or null if none.  MEMORY_ACCESS_TYPE
    1397              :    says how the load or store is going to be implemented and GROUP_SIZE
    1398              :    is the number of load or store statements in the containing group.
    1399              :    If the access is a gather load or scatter store, GS_INFO describes
    1400              :    its arguments.  If the load or store is conditional, SCALAR_MASK is the
    1401              :    condition under which it occurs.
    1402              : 
    1403              :    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
    1404              :    vectors is not supported, otherwise record the required rgroup control
    1405              :    types.
    1406              : 
    1407              :    If partial vectors can be used and ELSVALS is nonzero the supported
    1408              :    else values will be added to the vector ELSVALS points to.  */
    1409              : 
    1410              : static void
    1411       288822 : check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
    1412              :                                       slp_tree slp_node,
    1413              :                                       vec_load_store_type vls_type,
    1414              :                                       int group_size,
    1415              :                                       vect_load_store_data *ls,
    1416              :                                       slp_tree mask_node,
    1417              :                                       vec<int> *elsvals = nullptr)
    1418              : {
    1419       288822 :   vect_memory_access_type memory_access_type = ls->memory_access_type;
    1420              : 
    1421              :   /* Invariant loads need no special support.  */
    1422       288822 :   if (memory_access_type == VMAT_INVARIANT)
    1423        29099 :     return;
    1424              : 
    1425              :   /* Figure whether the mask is uniform.  scalar_mask is used to
    1426              :      populate the scalar_cond_masked_set.  */
    1427       287617 :   tree scalar_mask = NULL_TREE;
    1428       287617 :   if (mask_node)
    1429         4968 :     for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i)
    1430              :       {
    1431         2535 :         tree def = vect_get_slp_scalar_def (mask_node, i);
    1432         2535 :         if (!def
    1433         2535 :             || (scalar_mask && def != scalar_mask))
    1434              :           {
    1435              :             scalar_mask = NULL;
    1436              :             break;
    1437              :           }
    1438              :         else
    1439         2504 :           scalar_mask = def;
    1440              :       }
    1441              : 
    1442       287617 :   unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node);
    1443       287617 :   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
    1444       287617 :   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
    1445       287617 :   machine_mode vecmode = TYPE_MODE (vectype);
    1446       287617 :   bool is_load = (vls_type == VLS_LOAD);
    1447       287617 :   if (memory_access_type == VMAT_LOAD_STORE_LANES)
    1448              :     {
    1449            0 :       nvectors /= group_size;
    1450            0 :       internal_fn ifn
    1451            0 :         = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
    1452              :                                                 elsvals)
    1453            0 :                    : vect_store_lanes_supported (vectype, group_size, true));
    1454            0 :       if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
    1455            0 :         vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
    1456            0 :       else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
    1457            0 :         vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
    1458              :                                scalar_mask);
    1459              :       else
    1460              :         {
    1461            0 :           if (dump_enabled_p ())
    1462            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1463              :                              "can't operate on partial vectors because"
    1464              :                              " the target doesn't have an appropriate"
    1465              :                              " load/store-lanes instruction.\n");
    1466            0 :           LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    1467              :         }
    1468            0 :       return;
    1469              :     }
    1470              : 
    1471       287617 :   if (mat_gather_scatter_p (memory_access_type))
    1472              :     {
    1473         1731 :       internal_fn ifn = (is_load
    1474         1731 :                          ? IFN_MASK_GATHER_LOAD
    1475              :                          : IFN_MASK_SCATTER_STORE);
    1476          419 :       internal_fn len_ifn = (is_load
    1477              :                              ? IFN_MASK_LEN_GATHER_LOAD
    1478              :                              : IFN_MASK_LEN_SCATTER_STORE);
    1479         1731 :       stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
    1480         1731 :       tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
    1481         1731 :                           ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
    1482         1731 :                           : ls->strided_offset_vectype);
    1483         1731 :       tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
    1484         1731 :       int scale = SLP_TREE_GS_SCALE (slp_node);
    1485              : 
    1486              :       /* The following "supported" checks just verify what we established in
    1487              :          get_load_store_type and don't try different offset types.
    1488              :          Therefore, off_vectype must be a supported offset type.  In case
    1489              :          we chose a different one use this instead.  */
    1490         1731 :       if (ls->supported_offset_vectype)
    1491            0 :         off_vectype = ls->supported_offset_vectype;
    1492              :       /* Same for scale.  */
    1493         1731 :       if (ls->supported_scale)
    1494            0 :         scale = ls->supported_scale;
    1495              : 
    1496         1731 :       if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
    1497              :                                                   memory_type,
    1498              :                                                   off_vectype, scale,
    1499              :                                                   elsvals))
    1500            0 :         vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
    1501         1731 :       else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
    1502              :                                                        memory_type,
    1503              :                                                        off_vectype, scale,
    1504              :                                                        elsvals)
    1505         1731 :                || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
    1506          566 :         vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
    1507              :                                scalar_mask);
    1508              :       else
    1509              :         {
    1510         1165 :           if (dump_enabled_p ())
    1511           24 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1512              :                              "can't operate on partial vectors because"
    1513              :                              " the target doesn't have an appropriate"
    1514              :                              " gather load or scatter store instruction.\n");
    1515         1165 :           LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    1516              :         }
    1517         1731 :       return;
    1518              :     }
    1519              : 
    1520       285886 :   if (memory_access_type != VMAT_CONTIGUOUS)
    1521              :     {
    1522              :       /* Element X of the data must come from iteration i * VF + X of the
    1523              :          scalar loop.  We need more work to support other mappings.  */
    1524        26163 :       if (dump_enabled_p ())
    1525          730 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1526              :                          "can't operate on partial vectors because an"
    1527              :                          " access isn't contiguous.\n");
    1528        26163 :       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    1529        26163 :       return;
    1530              :     }
    1531              : 
    1532       259723 :   if (!VECTOR_MODE_P (vecmode))
    1533              :     {
    1534            0 :       if (dump_enabled_p ())
    1535            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1536              :                          "can't operate on partial vectors when emulating"
    1537              :                          " vector operations.\n");
    1538            0 :       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    1539            0 :       return;
    1540              :     }
    1541              : 
    1542              :   /* We might load more scalars than we need for permuting SLP loads.
    1543              :      We checked in get_load_store_type that the extra elements
    1544              :      don't leak into a new vector.  */
    1545       349394 :   auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
    1546              :   {
    1547        89671 :     unsigned int nvectors;
    1548       179342 :     if (can_div_away_from_zero_p (size, nunits, &nvectors))
    1549        89671 :       return nvectors;
    1550              :     gcc_unreachable ();
    1551              :   };
    1552              : 
    1553       259723 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    1554       259723 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    1555       259723 :   machine_mode mask_mode;
    1556       259723 :   machine_mode vmode;
    1557       259723 :   bool using_partial_vectors_p = false;
    1558       259723 :   if (get_len_load_store_mode
    1559       259723 :       (vecmode, is_load, nullptr, elsvals).exists (&vmode))
    1560              :     {
    1561            0 :       nvectors = group_memory_nvectors (group_size * vf, nunits);
    1562            0 :       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
    1563            0 :       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
    1564            0 :       using_partial_vectors_p = true;
    1565              :     }
    1566       349394 :   else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    1567       259723 :            && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
    1568              :                                          elsvals))
    1569              :     {
    1570        89671 :       nvectors = group_memory_nvectors (group_size * vf, nunits);
    1571        89671 :       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
    1572        89671 :       using_partial_vectors_p = true;
    1573              :     }
    1574              : 
    1575        89671 :   if (!using_partial_vectors_p)
    1576              :     {
    1577       170052 :       if (dump_enabled_p ())
    1578        11647 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1579              :                          "can't operate on partial vectors because the"
    1580              :                          " target doesn't have the appropriate partial"
    1581              :                          " vectorization load or store.\n");
    1582       170052 :       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    1583              :     }
    1584              : }
    1585              : 
    1586              : /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
    1587              :    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
    1588              :    that needs to be applied to all loads and stores in a vectorized loop.
    1589              :    Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
    1590              :    otherwise return VEC_MASK & LOOP_MASK.
    1591              : 
    1592              :    MASK_TYPE is the type of both masks.  If new statements are needed,
    1593              :    insert them before GSI.  */
    1594              : 
    1595              : tree
    1596         1701 : prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
    1597              :                   tree vec_mask, gimple_stmt_iterator *gsi)
    1598              : {
    1599         1701 :   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
    1600         1701 :   if (!loop_mask)
    1601              :     return vec_mask;
    1602              : 
    1603          139 :   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
    1604              : 
    1605          139 :   if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
    1606              :     return vec_mask;
    1607              : 
    1608          139 :   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
    1609          139 :   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
    1610              :                                           vec_mask, loop_mask);
    1611              : 
    1612          139 :   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
    1613          139 :   return and_res;
    1614              : }
    1615              : 
    1616              : /* Determine whether we can use a gather load or scatter store to vectorize
    1617              :    strided load or store STMT_INFO by truncating the current offset to a
    1618              :    smaller width.  We need to be able to construct an offset vector:
    1619              : 
    1620              :      { 0, X, X*2, X*3, ... }
    1621              : 
    1622              :    without loss of precision, where X is STMT_INFO's DR_STEP.
    1623              : 
    1624              :    Return true if this is possible, describing the gather load or scatter
    1625              :    store in GS_INFO.  MASKED_P is true if the load or store is conditional.
    1626              : 
    1627              :    If we can use gather/scatter and ELSVALS is nonzero the supported
    1628              :    else values will be stored in the vector ELSVALS points to.  */
    1629              : 
    1630              : static bool
    1631        63792 : vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
    1632              :                                      loop_vec_info loop_vinfo, bool masked_p,
    1633              :                                      gather_scatter_info *gs_info,
    1634              :                                      vec<int> *elsvals)
    1635              : {
    1636        63792 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    1637        63792 :   data_reference *dr = dr_info->dr;
    1638        63792 :   tree step = DR_STEP (dr);
    1639        63792 :   if (TREE_CODE (step) != INTEGER_CST)
    1640              :     {
    1641              :       /* ??? Perhaps we could use range information here?  */
    1642        28604 :       if (dump_enabled_p ())
    1643          229 :         dump_printf_loc (MSG_NOTE, vect_location,
    1644              :                          "cannot truncate variable step.\n");
    1645        28604 :       return false;
    1646              :     }
    1647              : 
    1648              :   /* Get the number of bits in an element.  */
    1649        35188 :   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
    1650        35188 :   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
    1651              : 
    1652              :   /* Set COUNT to the upper limit on the number of elements - 1.
    1653              :      Start with the maximum vectorization factor.  */
    1654        35188 :   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
    1655              : 
    1656              :   /* Try lowering COUNT to the number of scalar latch iterations.  */
    1657        35188 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    1658        35188 :   widest_int max_iters;
    1659        35188 :   if (max_loop_iterations (loop, &max_iters)
    1660        69647 :       && max_iters < count)
    1661         2069 :     count = max_iters.to_shwi ();
    1662              : 
    1663              :   /* Try scales of 1 and the element size.  */
    1664        35188 :   unsigned int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
    1665        35188 :   wi::overflow_type overflow = wi::OVF_NONE;
    1666       105564 :   for (int i = 0; i < 2; ++i)
    1667              :     {
    1668        70376 :       unsigned int scale = scales[i];
    1669        70376 :       widest_int factor;
    1670        70376 :       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
    1671            0 :         continue;
    1672              : 
    1673              :       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
    1674        70376 :       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
    1675        70376 :       if (overflow)
    1676            0 :         continue;
    1677        70376 :       signop sign = range >= 0 ? UNSIGNED : SIGNED;
    1678        70376 :       unsigned int min_offset_bits = wi::min_precision (range, sign);
    1679              : 
    1680              :       /* Find the narrowest viable offset type.  */
    1681        70376 :       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
    1682        70376 :       tree offset_type = build_nonstandard_integer_type (offset_bits,
    1683              :                                                          sign == UNSIGNED);
    1684              : 
    1685              :       /* See whether the target supports the operation with an offset
    1686              :          no narrower than OFFSET_TYPE.  */
    1687        70376 :       tree memory_type = TREE_TYPE (DR_REF (dr));
    1688        70376 :       tree tmp_offset_vectype;
    1689        70376 :       int tmp_scale;
    1690        70376 :       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
    1691              :                                      vectype, memory_type, offset_type,
    1692              :                                      scale, &tmp_scale,
    1693              :                                      &gs_info->ifn, &gs_info->offset_vectype,
    1694              :                                      &tmp_offset_vectype, elsvals)
    1695        70376 :           || gs_info->ifn == IFN_LAST)
    1696        70376 :         continue;
    1697              : 
    1698            0 :       gs_info->decl = NULL_TREE;
    1699              :       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
    1700              :          but we don't need to store that here.  */
    1701            0 :       gs_info->base = NULL_TREE;
    1702            0 :       gs_info->alias_ptr = build_int_cst
    1703            0 :         (reference_alias_ptr_type (DR_REF (dr)),
    1704            0 :          get_object_alignment (DR_REF (dr)));
    1705            0 :       gs_info->element_type = TREE_TYPE (vectype);
    1706            0 :       gs_info->offset = fold_convert (offset_type, step);
    1707            0 :       gs_info->scale = scale;
    1708            0 :       gs_info->memory_type = memory_type;
    1709            0 :       return true;
    1710       140752 :     }
    1711              : 
    1712        35188 :   if (overflow && dump_enabled_p ())
    1713            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1714              :                      "truncating gather/scatter offset to %d bits"
    1715              :                      " might change its value.\n", element_bits);
    1716              : 
    1717              :   return false;
    1718        35188 : }
    1719              : 
    1720              : /* Return true if we can use gather/scatter or strided internal functions
    1721              :    to vectorize STMT_INFO, which is a grouped or strided load or store
    1722              :    with multiple lanes and will be implemented by a type-punned access
    1723              :    of a vector with element size that matches the number of lanes.
    1724              : 
    1725              :    MASKED_P is true if load or store is conditional.
    1726              :    When returning true, fill in GS_INFO with the information required to
    1727              :    perform the operation.  Also, store the punning type in PUNNED_VECTYPE.
    1728              : 
    1729              :    If successful and ELSVALS is nonzero the supported
    1730              :    else values will be stored in the vector ELSVALS points to.  */
    1731              : 
    1732              : static bool
    1733         4611 : vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
    1734              :                          loop_vec_info loop_vinfo, bool masked_p,
    1735              :                          unsigned int nelts,
    1736              :                          gather_scatter_info *info, vec<int> *elsvals,
    1737              :                          tree *pun_vectype)
    1738              : {
    1739         4611 :   data_reference *dr = dr_info->dr;
    1740              : 
    1741              :   /* TODO: We can support nelts > BITS_PER_UNIT or non-power-of-two by
    1742              :      multiple gathers/scatter.  */
    1743         8895 :   if (nelts > BITS_PER_UNIT || !pow2p_hwi (nelts))
    1744              :     return false;
    1745              : 
    1746              :   /* Pun the vectype with one of the same size but an element spanning
    1747              :      NELTS elements of VECTYPE.
    1748              :      The punned type of a V16QI with NELTS = 4 would be V4SI.
    1749              :      */
    1750         3973 :   tree tmp;
    1751         3973 :   unsigned int pieces;
    1752         3973 :   if (!can_div_trunc_p (TYPE_VECTOR_SUBPARTS (vectype), nelts, &pieces)
    1753         3973 :       || !pieces)
    1754          352 :     return false;
    1755              : 
    1756         3621 :   *pun_vectype = vector_vector_composition_type (vectype, pieces, &tmp, true);
    1757              : 
    1758         3621 :   if (!*pun_vectype || !VECTOR_TYPE_P (*pun_vectype))
    1759              :     return false;
    1760              : 
    1761         3245 :   internal_fn ifn;
    1762         3245 :   tree offset_vectype = *pun_vectype;
    1763              : 
    1764         2171 :   internal_fn strided_ifn = DR_IS_READ (dr)
    1765         3245 :     ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
    1766              : 
    1767              :   /* Check if we have a gather/scatter with the new type.  We're just trying
    1768              :      with the type itself as offset for now.  If not, check if we have a
    1769              :      strided load/store.  These have fewer constraints (for example no offset
    1770              :      type must exist) so it is possible that even though a gather/scatter is
    1771              :      not available we still have a strided load/store.  */
    1772         3245 :   bool ok = false;
    1773         3245 :   tree tmp_vectype;
    1774         3245 :   int tmp_scale;
    1775         3245 :   if (vect_gather_scatter_fn_p
    1776         3245 :       (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
    1777         3245 :        TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
    1778              :        &offset_vectype, &tmp_vectype, elsvals))
    1779              :     ok = true;
    1780         3245 :   else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
    1781              :                                             elsvals))
    1782              :     {
    1783              :       /* Use gather/scatter IFNs, vect_get_strided_load_store_ops
    1784              :          will switch back to the strided variants.  */
    1785            0 :       ifn = DR_IS_READ (dr) ? IFN_MASK_LEN_GATHER_LOAD :
    1786              :         IFN_MASK_LEN_SCATTER_STORE;
    1787            0 :       ok = true;
    1788              :     }
    1789              : 
    1790            0 :   if (ok)
    1791              :     {
    1792            0 :       info->ifn = ifn;
    1793            0 :       info->decl = NULL_TREE;
    1794            0 :       info->base = dr->ref;
    1795            0 :       info->alias_ptr = build_int_cst
    1796            0 :         (reference_alias_ptr_type (DR_REF (dr)),
    1797            0 :          get_object_alignment (DR_REF (dr)));
    1798            0 :       info->element_type = TREE_TYPE (*pun_vectype);
    1799            0 :       info->offset_vectype = offset_vectype;
    1800              :       /* No need to set the offset, vect_get_strided_load_store_ops
    1801              :          will do that.  */
    1802            0 :       info->scale = 1;
    1803            0 :       info->memory_type = TREE_TYPE (DR_REF (dr));
    1804            0 :       return true;
    1805              :     }
    1806              : 
    1807              :   return false;
    1808              : }
    1809              : 
    1810              : 
    1811              : /* Return true if we can use gather/scatter internal functions to
    1812              :    vectorize STMT_INFO, which is a grouped or strided load or store.
    1813              :    MASKED_P is true if load or store is conditional.  When returning
    1814              :    true, fill in GS_INFO with the information required to perform the
    1815              :    operation.
    1816              : 
    1817              :    If we can use gather/scatter and ELSVALS is nonzero the supported
    1818              :    else values will be stored in the vector ELSVALS points to.  */
    1819              : 
    1820              : static bool
    1821        63792 : vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
    1822              :                                     loop_vec_info loop_vinfo, bool masked_p,
    1823              :                                     gather_scatter_info *gs_info,
    1824              :                                     vec<int> *elsvals,
    1825              :                                     unsigned int group_size,
    1826              :                                     bool single_element_p)
    1827              : {
    1828        63792 :   if (!vect_check_gather_scatter (stmt_info, vectype,
    1829              :                                   loop_vinfo, gs_info, elsvals)
    1830        63792 :       || gs_info->ifn == IFN_LAST)
    1831              :     {
    1832        63792 :       if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
    1833              :                                                 masked_p, gs_info, elsvals))
    1834              :         return false;
    1835              :     }
    1836              : 
    1837            0 :   if (!single_element_p
    1838            0 :       && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
    1839              :                                                    gs_info->scale,
    1840              :                                                    group_size))
    1841              :     return false;
    1842              : 
    1843            0 :   if (dump_enabled_p ())
    1844            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1845              :                      "using gather/scatter for strided/grouped access,"
    1846              :                      " scale = %d\n", gs_info->scale);
    1847              : 
    1848              :   return true;
    1849              : }
    1850              : 
    1851              : /* STMT_INFO is a non-strided load or store, meaning that it accesses
    1852              :    elements with a known constant step.  Return -1 if that step
    1853              :    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
    1854              : 
    1855              : int
    1856      1471104 : compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
    1857              : {
    1858      1471104 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    1859      1471104 :   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
    1860      1471104 :                                size_zero_node);
    1861              : }
    1862              : 
    1863              : /* If the target supports a permute mask that reverses the elements in
    1864              :    a vector of type VECTYPE, return that mask, otherwise return null.  */
    1865              : 
    1866              : tree
    1867         9164 : perm_mask_for_reverse (tree vectype)
    1868              : {
    1869         9164 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    1870              : 
    1871              :   /* The encoding has a single stepped pattern.  */
    1872         9164 :   vec_perm_builder sel (nunits, 1, 3);
    1873        36656 :   for (int i = 0; i < 3; ++i)
    1874        27492 :     sel.quick_push (nunits - 1 - i);
    1875              : 
    1876         9164 :   vec_perm_indices indices (sel, 1, nunits);
    1877         9164 :   if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
    1878              :                              indices))
    1879              :     return NULL_TREE;
    1880         8012 :   return vect_gen_perm_mask_checked (vectype, indices);
    1881         9164 : }
    1882              : 
    1883              : /* A subroutine of get_load_store_type, with a subset of the same
    1884              :    arguments.  Handle the case where STMT_INFO is a load or store that
    1885              :    accesses consecutive elements with a negative step.  Sets *POFFSET
    1886              :    to the offset to be applied to the DR for the first access.  */
    1887              : 
    1888              : static vect_memory_access_type
    1889        12150 : get_negative_load_store_type (vec_info *vinfo,
    1890              :                               stmt_vec_info stmt_info, tree vectype,
    1891              :                               vec_load_store_type vls_type,
    1892              :                               unsigned int ncopies, poly_int64 *poffset)
    1893              : {
    1894        12150 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    1895        12150 :   dr_alignment_support alignment_support_scheme;
    1896              : 
    1897        12150 :   if (ncopies > 1)
    1898              :     {
    1899            0 :       if (dump_enabled_p ())
    1900            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1901              :                          "multiple types with negative step.\n");
    1902            0 :       return VMAT_ELEMENTWISE;
    1903              :     }
    1904              : 
    1905              :   /* For backward running DRs the first access in vectype actually is
    1906              :      N-1 elements before the address of the DR.  */
    1907        12150 :   *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    1908        12150 :               * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    1909              : 
    1910        12150 :   int misalignment = dr_misalignment (dr_info, vectype, *poffset);
    1911        12150 :   alignment_support_scheme
    1912        12150 :     = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
    1913        12150 :   if (alignment_support_scheme != dr_aligned
    1914        12150 :       && alignment_support_scheme != dr_unaligned_supported)
    1915              :     {
    1916         4374 :       if (dump_enabled_p ())
    1917            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1918              :                          "negative step but alignment required.\n");
    1919         4374 :       *poffset = 0;
    1920         4374 :       return VMAT_ELEMENTWISE;
    1921              :     }
    1922              : 
    1923         7776 :   if (vls_type == VLS_STORE_INVARIANT)
    1924              :     {
    1925         1197 :       if (dump_enabled_p ())
    1926           21 :         dump_printf_loc (MSG_NOTE, vect_location,
    1927              :                          "negative step with invariant source;"
    1928              :                          " no permute needed.\n");
    1929         1197 :       return VMAT_CONTIGUOUS_DOWN;
    1930              :     }
    1931              : 
    1932         6579 :   if (!perm_mask_for_reverse (vectype))
    1933              :     {
    1934         1152 :       if (dump_enabled_p ())
    1935           52 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1936              :                          "negative step and reversing not supported.\n");
    1937         1152 :       *poffset = 0;
    1938         1152 :       return VMAT_ELEMENTWISE;
    1939              :     }
    1940              : 
    1941              :   return VMAT_CONTIGUOUS_REVERSE;
    1942              : }
    1943              : 
    1944              : /* STMT_INFO is either a masked or unconditional store.  Return the value
    1945              :    being stored.  */
    1946              : 
    1947              : tree
    1948            0 : vect_get_store_rhs (stmt_vec_info stmt_info)
    1949              : {
    1950            0 :   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
    1951              :     {
    1952            0 :       gcc_assert (gimple_assign_single_p (assign));
    1953            0 :       return gimple_assign_rhs1 (assign);
    1954              :     }
    1955            0 :   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
    1956              :     {
    1957            0 :       internal_fn ifn = gimple_call_internal_fn (call);
    1958            0 :       int index = internal_fn_stored_value_index (ifn);
    1959            0 :       gcc_assert (index >= 0);
    1960            0 :       return gimple_call_arg (call, index);
    1961              :     }
    1962            0 :   gcc_unreachable ();
    1963              : }
    1964              : 
    1965              : /* Function VECTOR_VECTOR_COMPOSITION_TYPE
    1966              : 
    1967              :    This function returns a vector type which can be composed with NELTS pieces,
    1968              :    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
    1969              :    same vector size as the return vector.  It checks target whether supports
    1970              :    pieces-size vector mode for construction firstly, if target fails to, check
    1971              :    pieces-size scalar mode for construction further.  It returns NULL_TREE if
    1972              :    fails to find the available composition.  If the caller only wants scalar
    1973              :    pieces where PTYPE e.g. is a possible gather/scatter element type
    1974              :    SCALAR_PTYPE_ONLY must be true.
    1975              : 
    1976              :    For example, for (vtype=V16QI, nelts=4), we can probably get:
    1977              :      - V16QI with PTYPE V4QI.
    1978              :      - V4SI with PTYPE SI.
    1979              :      - NULL_TREE.  */
    1980              : 
    1981              : static tree
    1982        14203 : vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype,
    1983              :                                 bool scalar_ptype_only)
    1984              : {
    1985        14203 :   gcc_assert (VECTOR_TYPE_P (vtype));
    1986        14203 :   gcc_assert (known_gt (nelts, 0U));
    1987              : 
    1988        14203 :   machine_mode vmode = TYPE_MODE (vtype);
    1989        14203 :   if (!VECTOR_MODE_P (vmode))
    1990              :     return NULL_TREE;
    1991              : 
    1992              :   /* When we are asked to compose the vector from its components let
    1993              :      that happen directly.  */
    1994        14203 :   if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
    1995              :     {
    1996         5990 :       *ptype = TREE_TYPE (vtype);
    1997         5990 :       return vtype;
    1998              :     }
    1999              : 
    2000        16426 :   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
    2001         8213 :   unsigned int pbsize;
    2002         8213 :   if (constant_multiple_p (vbsize, nelts, &pbsize))
    2003              :     {
    2004              :       /* First check if vec_init optab supports construction from
    2005              :          vector pieces directly.  */
    2006         8213 :       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
    2007        16426 :       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
    2008         8213 :       machine_mode rmode;
    2009         8213 :       if (!scalar_ptype_only
    2010         4592 :           && related_vector_mode (vmode, elmode, inelts).exists (&rmode)
    2011        12343 :           && (convert_optab_handler (vec_init_optab, vmode, rmode)
    2012              :               != CODE_FOR_nothing))
    2013              :         {
    2014         3490 :           *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
    2015         3490 :           return vtype;
    2016              :         }
    2017              : 
    2018              :       /* Otherwise check if exists an integer type of the same piece size and
    2019              :          if vec_init optab supports construction from it directly.  */
    2020         4723 :       if (int_mode_for_size (pbsize, 0).exists (&elmode)
    2021         4723 :           && related_vector_mode (vmode, elmode, nelts).exists (&rmode))
    2022              :         {
    2023         4309 :           if (scalar_ptype_only
    2024         4309 :               || convert_optab_handler (vec_init_optab, rmode, elmode)
    2025              :               != CODE_FOR_nothing)
    2026              :             {
    2027         4309 :               *ptype = build_nonstandard_integer_type (pbsize, 1);
    2028         4309 :               return build_vector_type (*ptype, nelts);
    2029              :             }
    2030              :         }
    2031              :     }
    2032              : 
    2033              :   return NULL_TREE;
    2034              : }
    2035              : 
    2036              : /* Check if the load permutation of NODE only refers to a consecutive
    2037              :    subset of the group indices where GROUP_SIZE is the size of the
    2038              :    dataref's group.  We also assert that the length of the permutation
    2039              :    divides the group size and is a power of two.
    2040              :    Such load permutations can be elided in strided access schemes as
    2041              :    we can "jump over" the gap they leave.  */
    2042              : 
    2043              : bool
    2044        45044 : has_consecutive_load_permutation (slp_tree node, unsigned group_size)
    2045              : {
    2046        45044 :   load_permutation_t perm = SLP_TREE_LOAD_PERMUTATION (node);
    2047        45044 :   if (!perm.exists ()
    2048         2164 :       || perm.length () <= 1
    2049          496 :       || !pow2p_hwi (perm.length ())
    2050        45524 :       || group_size % perm.length ())
    2051              :     return false;
    2052              : 
    2053          433 :   return vect_load_perm_consecutive_p (node);
    2054              : }
    2055              : 
    2056              : 
    2057              : /* Analyze load or store SLP_NODE of type VLS_TYPE.  Return true
    2058              :    if there is a memory access type that the vectorized form can use,
    2059              :    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
    2060              :    or scatters, fill in GS_INFO accordingly.  In addition
    2061              :    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
    2062              :    the target does not support the alignment scheme.  *MISALIGNMENT
    2063              :    is set according to the alignment of the access (including
    2064              :    DR_MISALIGNMENT_UNKNOWN when it is unknown).
    2065              : 
    2066              :    MASKED_P is true if the statement is conditional on a vectorized mask.
    2067              :    VECTYPE is the vector type that the vectorized statements will use.
    2068              : 
    2069              :    If ELSVALS is nonzero the supported else values will be stored in the
    2070              :    vector ELSVALS points to.  */
    2071              : 
    2072              : static bool
    2073      1356520 : get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
    2074              :                      tree vectype, slp_tree slp_node,
    2075              :                      bool masked_p, vec_load_store_type vls_type,
    2076              :                      vect_load_store_data *ls)
    2077              : {
    2078      1356520 :   vect_memory_access_type *memory_access_type = &ls->memory_access_type;
    2079      1356520 :   poly_int64 *poffset = &ls->poffset;
    2080      1356520 :   dr_alignment_support *alignment_support_scheme
    2081              :     = &ls->alignment_support_scheme;
    2082      1356520 :   int *misalignment = &ls->misalignment;
    2083      1356520 :   internal_fn *lanes_ifn = &ls->lanes_ifn;
    2084      1356520 :   vec<int> *elsvals = &ls->elsvals;
    2085      1356520 :   tree *ls_type = &ls->ls_type;
    2086      1356520 :   bool *slp_perm = &ls->slp_perm;
    2087      1356520 :   unsigned *n_perms = &ls->n_perms;
    2088      1356520 :   unsigned *n_loads = &ls->n_loads;
    2089      1356520 :   tree *supported_offset_vectype = &ls->supported_offset_vectype;
    2090      1356520 :   int *supported_scale = &ls->supported_scale;
    2091      1356520 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    2092      1356520 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    2093      1356520 :   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    2094      1356520 :   stmt_vec_info first_stmt_info;
    2095      1356520 :   unsigned int group_size;
    2096      1356520 :   unsigned HOST_WIDE_INT gap;
    2097      1356520 :   bool single_element_p;
    2098      1356520 :   poly_int64 neg_ldst_offset = 0;
    2099              : 
    2100      1356520 :   *misalignment = DR_MISALIGNMENT_UNKNOWN;
    2101      1356520 :   *poffset = 0;
    2102      1356520 :   *ls_type = NULL_TREE;
    2103      1356520 :   *slp_perm = false;
    2104      1356520 :   *n_perms = -1U;
    2105      1356520 :   *n_loads = -1U;
    2106      1356520 :   ls->subchain_p = false;
    2107              : 
    2108      1356520 :   bool perm_ok = true;
    2109      1356520 :   poly_int64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
    2110              : 
    2111      1356520 :   if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
    2112        71522 :     perm_ok = vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
    2113        71522 :                                             vf, true, n_perms, n_loads);
    2114              : 
    2115      1356520 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2116              :     {
    2117       870031 :       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    2118       870031 :       group_size = DR_GROUP_SIZE (first_stmt_info);
    2119       870031 :       gap = DR_GROUP_GAP (first_stmt_info);
    2120       870031 :       single_element_p = (stmt_info == first_stmt_info
    2121       870031 :                           && !DR_GROUP_NEXT_ELEMENT (stmt_info));
    2122              :     }
    2123              :   else
    2124              :     {
    2125              :       first_stmt_info = stmt_info;
    2126              :       group_size = 1;
    2127              :       gap = 0;
    2128              :       single_element_p = true;
    2129              :     }
    2130      1356520 :   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
    2131              : 
    2132              :   /* True if the vectorized statements would access beyond the last
    2133              :      statement in the group.  */
    2134      1356520 :   bool overrun_p = false;
    2135              : 
    2136              :   /* True if we can cope with such overrun by peeling for gaps, so that
    2137              :      there is at least one final scalar iteration after the vector loop.  */
    2138      2713040 :   bool can_overrun_p = (!masked_p
    2139      1356520 :                         && vls_type == VLS_LOAD
    2140       538709 :                         && loop_vinfo
    2141      1767877 :                         && !loop->inner);
    2142              : 
    2143              :   /* There can only be a gap at the end of the group if the stride is
    2144              :      known at compile time.  */
    2145      1356520 :   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
    2146              : 
    2147              :   /* For SLP vectorization we directly vectorize a subchain
    2148              :      without permutation.  */
    2149      1356520 :   if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
    2150      1284998 :     first_dr_info = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
    2151              : 
    2152      1356520 :   if (STMT_VINFO_STRIDED_P (first_stmt_info))
    2153              :     {
    2154              :       /* Try to use consecutive accesses of as many elements as possible,
    2155              :          separated by the stride, until we have a complete vector.
    2156              :          Fall back to scalar accesses if that isn't possible.  */
    2157        45044 :       *memory_access_type = VMAT_STRIDED_SLP;
    2158              : 
    2159              :       /* If the load permutation is consecutive we can reduce the group to
    2160              :          the elements the permutation accesses.  Then we release the
    2161              :          permutation.  */
    2162        45044 :       if (has_consecutive_load_permutation (slp_node, group_size))
    2163              :         {
    2164           32 :           ls->subchain_p = true;
    2165           32 :           group_size = SLP_TREE_LANES (slp_node);
    2166           32 :           SLP_TREE_LOAD_PERMUTATION (slp_node).release ();
    2167              :         }
    2168              :     }
    2169      1311476 :   else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    2170              :     {
    2171        10828 :       slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
    2172        10828 :       tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
    2173        10828 :       int scale = SLP_TREE_GS_SCALE (slp_node);
    2174        10828 :       tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
    2175        10828 :       tree tem;
    2176        10828 :       if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
    2177              :                                     masked_p, vectype, memory_type,
    2178              :                                     offset_vectype, scale, supported_scale,
    2179              :                                     &ls->gs.ifn, &tem,
    2180              :                                     supported_offset_vectype, elsvals))
    2181              :         {
    2182            0 :           if (dump_enabled_p ())
    2183              :             {
    2184            0 :               dump_printf_loc (MSG_NOTE, vect_location,
    2185              :                                "gather/scatter with required "
    2186              :                                "offset type "
    2187              :                                "%T and offset scale %d.\n",
    2188              :                                offset_vectype, scale);
    2189            0 :               if (*supported_offset_vectype)
    2190            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    2191              :                                  " target supports offset type %T.\n",
    2192              :                                  *supported_offset_vectype);
    2193            0 :               if (*supported_scale)
    2194            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    2195              :                                  " target supports offset scale %d.\n",
    2196              :                                  *supported_scale);
    2197              :             }
    2198            0 :           *memory_access_type = VMAT_GATHER_SCATTER_IFN;
    2199              :         }
    2200        10828 :       else if (vls_type == VLS_LOAD
    2201        10828 :                ? (targetm.vectorize.builtin_gather
    2202         9235 :                   && (ls->gs.decl
    2203         9235 :                         = targetm.vectorize.builtin_gather (vectype,
    2204         9235 :                                                             TREE_TYPE
    2205              :                                                               (offset_vectype),
    2206              :                                                             scale)))
    2207         1593 :                : (targetm.vectorize.builtin_scatter
    2208         1593 :                   && (ls->gs.decl
    2209         1593 :                         = targetm.vectorize.builtin_scatter (vectype,
    2210         1593 :                                                              TREE_TYPE
    2211              :                                                                (offset_vectype),
    2212              :                                                              scale))))
    2213          574 :         *memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
    2214              :       else
    2215              :         {
    2216              :           /* GATHER_SCATTER_EMULATED_P.  */
    2217        10254 :           if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
    2218        10254 :               || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
    2219        10254 :               || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
    2220        10254 :               || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
    2221        10254 :                                        TYPE_VECTOR_SUBPARTS (vectype)))
    2222              :             {
    2223         2732 :               if (dump_enabled_p ())
    2224          466 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2225              :                                  "unsupported vector types for emulated "
    2226              :                                  "gather.\n");
    2227         2732 :               return false;
    2228              :             }
    2229         7522 :           *memory_access_type = VMAT_GATHER_SCATTER_EMULATED;
    2230              :         }
    2231              :     }
    2232              :   else
    2233              :     {
    2234      1300648 :       int cmp = compare_step_with_zero (vinfo, stmt_info);
    2235      1300648 :       if (cmp < 0)
    2236              :         {
    2237        12328 :           if (single_element_p)
    2238              :             /* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
    2239              :                only correct for single element "interleaving" SLP.  */
    2240        12150 :             *memory_access_type = get_negative_load_store_type
    2241        12150 :                 (vinfo, stmt_info, vectype, vls_type, 1,
    2242              :                  &neg_ldst_offset);
    2243              :           else
    2244              :             /* We can fall back to VMAT_STRIDED_SLP since that does
    2245              :                not care whether the stride between the group instances
    2246              :                is positive or negative.  */
    2247          178 :             *memory_access_type = VMAT_STRIDED_SLP;
    2248              :         }
    2249      1288320 :       else if (cmp == 0 && loop_vinfo)
    2250              :         {
    2251         3351 :           gcc_assert (vls_type == VLS_LOAD);
    2252         3351 :           *memory_access_type = VMAT_INVARIANT;
    2253              :         }
    2254              :       /* Try using LOAD/STORE_LANES.  */
    2255      1284969 :       else if (slp_node->ldst_lanes
    2256      1284969 :                && (*lanes_ifn
    2257            0 :                    = (vls_type == VLS_LOAD
    2258            0 :                       ? vect_load_lanes_supported (vectype, group_size,
    2259              :                                                    masked_p, elsvals)
    2260            0 :                       : vect_store_lanes_supported (vectype, group_size,
    2261              :                                                     masked_p))) != IFN_LAST)
    2262            0 :         *memory_access_type = VMAT_LOAD_STORE_LANES;
    2263      1284969 :       else if (!loop_vinfo && slp_node->avoid_stlf_fail)
    2264              :         {
    2265           70 :           *memory_access_type = VMAT_ELEMENTWISE;
    2266           70 :           if (dump_enabled_p ())
    2267            2 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2268              :                              "using element-wise load to avoid disrupting "
    2269              :                              "cross iteration store-to-load forwarding\n");
    2270              :         }
    2271              :       else
    2272      1284899 :         *memory_access_type = VMAT_CONTIGUOUS;
    2273              : 
    2274              :       /* If this is single-element interleaving with an element
    2275              :          distance that leaves unused vector loads around fall back
    2276              :          to elementwise access if possible - we otherwise least
    2277              :          create very sub-optimal code in that case (and
    2278              :          blow up memory, see PR65518).  */
    2279      1300648 :       if (loop_vinfo
    2280      1300648 :           && single_element_p
    2281       467076 :           && (*memory_access_type == VMAT_CONTIGUOUS
    2282        15501 :               || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
    2283      1767724 :           && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
    2284              :         {
    2285        17826 :           *memory_access_type = VMAT_ELEMENTWISE;
    2286        17826 :           if (dump_enabled_p ())
    2287          198 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2288              :                              "single-element interleaving not supported "
    2289              :                              "for not adjacent vector loads, using "
    2290              :                              "elementwise access\n");
    2291              :         }
    2292              : 
    2293              :       /* Also fall back to elementwise access in case we did not lower a
    2294              :          permutation and cannot code generate it.  */
    2295      1300648 :       if (loop_vinfo
    2296       521510 :           && *memory_access_type != VMAT_ELEMENTWISE
    2297       498158 :           && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
    2298      1329066 :           && !perm_ok)
    2299              :         {
    2300         2055 :           *memory_access_type = VMAT_ELEMENTWISE;
    2301         2055 :           if (dump_enabled_p ())
    2302          246 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2303              :                              "permutation not supported, using elementwise "
    2304              :                              "access\n");
    2305              :         }
    2306              : 
    2307       521510 :       overrun_p = (loop_vinfo && gap != 0
    2308      1343507 :                    && *memory_access_type != VMAT_ELEMENTWISE);
    2309      1300648 :       if (overrun_p && vls_type != VLS_LOAD)
    2310              :         {
    2311            0 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2312              :                            "Grouped store with gaps requires"
    2313              :                            " non-consecutive accesses\n");
    2314            9 :           return false;
    2315              :         }
    2316              : 
    2317      1300648 :       unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (first_dr_info);
    2318      1300648 :       poly_int64 off = 0;
    2319      1300648 :       if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
    2320         5268 :         off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
    2321              : 
    2322              :       /* An overrun is fine if the trailing elements are smaller
    2323              :          than the alignment boundary B.  Every vector access will
    2324              :          be a multiple of B and so we are guaranteed to access a
    2325              :          non-gap element in the same B-sized block.  */
    2326      1300648 :       if (overrun_p
    2327      1300648 :           && gap < (vect_known_alignment_in_bytes (first_dr_info,
    2328        22893 :                                                    vectype, off) / dr_size))
    2329              :         overrun_p = false;
    2330              : 
    2331              :       /* When we have a contiguous access across loop iterations
    2332              :          but the access in the loop doesn't cover the full vector
    2333              :          we can end up with no gap recorded but still excess
    2334              :          elements accessed, see PR103116.  Make sure we peel for
    2335              :          gaps if necessary and sufficient and give up if not.
    2336              : 
    2337              :          If there is a combination of the access not covering the full
    2338              :          vector and a gap recorded then we may need to peel twice.  */
    2339      1300648 :       bool large_vector_overrun_p = false;
    2340      1300648 :       if (loop_vinfo
    2341       521510 :           && (*memory_access_type == VMAT_CONTIGUOUS
    2342        35396 :               || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
    2343       491382 :           && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
    2344      1326634 :           && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
    2345              :                           nunits))
    2346              :         large_vector_overrun_p = overrun_p = true;
    2347              : 
    2348              :       /* If the gap splits the vector in half and the target
    2349              :          can do half-vector operations avoid the epilogue peeling
    2350              :          by simply loading half of the vector only.  Usually
    2351              :          the construction with an upper zero half will be elided.  */
    2352      1300648 :       dr_alignment_support alss;
    2353      1300648 :       int misalign = dr_misalignment (first_dr_info, vectype, off);
    2354      1300648 :       tree half_vtype;
    2355      1300648 :       poly_uint64 remain;
    2356      1300648 :       unsigned HOST_WIDE_INT tem, num;
    2357      1300648 :       if (overrun_p
    2358      1300648 :           && !masked_p
    2359        17422 :           && *memory_access_type != VMAT_LOAD_STORE_LANES
    2360        17422 :           && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
    2361              :                                                       vectype, misalign)))
    2362              :               == dr_aligned
    2363        14946 :               || alss == dr_unaligned_supported)
    2364         9838 :           && can_div_trunc_p (group_size
    2365         9838 :                               * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
    2366              :                               nunits, &tem, &remain)
    2367      1310486 :           && (known_eq (remain, 0u)
    2368         7383 :               || (known_ne (remain, 0u)
    2369         5726 :                   && constant_multiple_p (nunits, remain, &num)
    2370      1298193 :                   && (vector_vector_composition_type (vectype, num, &half_vtype)
    2371              :                       != NULL_TREE))))
    2372         8181 :         overrun_p = false;
    2373              : 
    2374      1300648 :       if (overrun_p && !can_overrun_p)
    2375              :         {
    2376            6 :           if (dump_enabled_p ())
    2377            6 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2378              :                              "Peeling for outer loop is not supported\n");
    2379            6 :           return false;
    2380              :         }
    2381              : 
    2382              :       /* Peeling for gaps assumes that a single scalar iteration
    2383              :          is enough to make sure the last vector iteration doesn't
    2384              :          access excess elements.  */
    2385      1300642 :       if (overrun_p
    2386      1300642 :           && (!can_div_trunc_p (group_size
    2387         9235 :                                 * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
    2388              :                                 nunits, &tem, &remain)
    2389         9235 :               || maybe_lt (remain + group_size, nunits)))
    2390              :         {
    2391              :           /* But peeling a single scalar iteration is enough if
    2392              :              we can use the next power-of-two sized partial
    2393              :              access and that is sufficiently small to be covered
    2394              :              by the single scalar iteration.  */
    2395           16 :           unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
    2396           16 :           if (masked_p
    2397           16 :               || !nunits.is_constant (&cnunits)
    2398           16 :               || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
    2399           16 :               || (((cremain = (group_size * cvf - gap) % cnunits), true)
    2400           16 :                   && ((cpart_size = (1 << ceil_log2 (cremain))), true)
    2401           16 :                   && (cremain + group_size < cpart_size
    2402           13 :                       || (vector_vector_composition_type (vectype,
    2403           13 :                                                          cnunits / cpart_size,
    2404              :                                                          &half_vtype)
    2405              :                           == NULL_TREE))))
    2406              :             {
    2407              :               /* If all fails we can still resort to niter masking unless
    2408              :                  the vectors used are too big, so enforce the use of
    2409              :                  partial vectors.  */
    2410            3 :               if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
    2411            3 :                   && !large_vector_overrun_p)
    2412              :                 {
    2413            0 :                   if (dump_enabled_p ())
    2414            0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2415              :                                      "peeling for gaps insufficient for "
    2416              :                                      "access unless using partial "
    2417              :                                      "vectors\n");
    2418            0 :                   LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
    2419              :                 }
    2420              :               else
    2421              :                 {
    2422            3 :                   if (dump_enabled_p ())
    2423            3 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2424              :                                      "peeling for gaps insufficient for "
    2425              :                                      "access\n");
    2426            3 :                   return false;
    2427              :                 }
    2428              :             }
    2429           13 :           else if (large_vector_overrun_p)
    2430              :             {
    2431           13 :               if (dump_enabled_p ())
    2432           12 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2433              :                                  "can't operate on partial vectors because "
    2434              :                                  "only unmasked loads handle access "
    2435              :                                  "shortening required because of gaps at "
    2436              :                                  "the end of the access\n");
    2437           13 :               LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    2438              :             }
    2439              :         }
    2440              :     }
    2441              : 
    2442              :   /* As a last resort, trying using a gather load or scatter store.
    2443              : 
    2444              :      ??? Although the code can handle all group sizes correctly,
    2445              :      it probably isn't a win to use separate strided accesses based
    2446              :      on nearby locations.  Or, even if it's a win over scalar code,
    2447              :      it might not be a win over vectorizing at a lower VF, if that
    2448              :      allows us to use contiguous accesses.  */
    2449      1353779 :   vect_memory_access_type grouped_gather_fallback = VMAT_UNINITIALIZED;
    2450      1353779 :   if (loop_vinfo
    2451       574641 :       && (*memory_access_type == VMAT_ELEMENTWISE
    2452       574641 :           || *memory_access_type == VMAT_STRIDED_SLP))
    2453              :     {
    2454        70624 :       gather_scatter_info gs_info;
    2455        70624 :       tree tem;
    2456        70624 :       if (SLP_TREE_LANES (slp_node) == 1
    2457        65820 :           && (!SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
    2458        21600 :               || single_element_p)
    2459       134416 :           && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
    2460              :                                                  masked_p, &gs_info, elsvals,
    2461              :                                                  group_size, single_element_p))
    2462              :         {
    2463              :           /* vect_use_strided_gather_scatters_p does not save the actually
    2464              :              supported scale and offset type so do that here.
    2465              :              We need it later in check_load_store_for_partial_vectors
    2466              :              where we only check if the given internal function is supported
    2467              :              (to choose whether to use the IFN, LEGACY, or EMULATED flavor
    2468              :              of gather/scatter) and don't re-do the full analysis.  */
    2469            0 :           tree tmp;
    2470            0 :           gcc_assert (vect_gather_scatter_fn_p
    2471              :                       (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
    2472              :                        gs_info.memory_type, TREE_TYPE (gs_info.offset),
    2473              :                        gs_info.scale, supported_scale, &gs_info.ifn,
    2474              :                        &tmp, supported_offset_vectype, elsvals));
    2475              : 
    2476            0 :           SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
    2477            0 :           SLP_TREE_GS_BASE (slp_node) = error_mark_node;
    2478            0 :           ls->gs.ifn = gs_info.ifn;
    2479            0 :           ls->strided_offset_vectype = gs_info.offset_vectype;
    2480            0 :           *memory_access_type = VMAT_GATHER_SCATTER_IFN;
    2481              :         }
    2482        70624 :       else if (SLP_TREE_LANES (slp_node) > 1
    2483              :                && !masked_p
    2484         4804 :                && !single_element_p
    2485        75235 :                && vect_use_grouped_gather (STMT_VINFO_DR_INFO (stmt_info),
    2486              :                                            vectype, loop_vinfo,
    2487              :                                            masked_p, group_size,
    2488              :                                            &gs_info, elsvals, &tem))
    2489              :         {
    2490            0 :           SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
    2491            0 :           SLP_TREE_GS_BASE (slp_node) = error_mark_node;
    2492            0 :           grouped_gather_fallback = *memory_access_type;
    2493            0 :           *memory_access_type = VMAT_GATHER_SCATTER_IFN;
    2494            0 :           ls->gs.ifn = gs_info.ifn;
    2495            0 :           vectype = *ls_type = tem;
    2496            0 :           ls->strided_offset_vectype = gs_info.offset_vectype;
    2497              :         }
    2498              :     }
    2499              : 
    2500      1353779 :   if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
    2501      1353779 :       || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
    2502         6461 :     *poffset = neg_ldst_offset;
    2503              : 
    2504      1353779 :   if (*memory_access_type == VMAT_ELEMENTWISE
    2505      1328302 :       || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
    2506      1327728 :       || *memory_access_type == VMAT_STRIDED_SLP
    2507      1282511 :       || *memory_access_type == VMAT_INVARIANT)
    2508              :     {
    2509        74619 :       *alignment_support_scheme = dr_unaligned_supported;
    2510        74619 :       *misalignment = DR_MISALIGNMENT_UNKNOWN;
    2511              :     }
    2512              :   else
    2513              :     {
    2514      1279160 :       if (mat_gather_scatter_p (*memory_access_type)
    2515              :           && !first_dr_info)
    2516              :         *misalignment = DR_MISALIGNMENT_UNKNOWN;
    2517              :       else
    2518      1279160 :         *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
    2519      1279160 :       *alignment_support_scheme
    2520      1279160 :         = vect_supportable_dr_alignment
    2521      1279160 :            (vinfo, first_dr_info, vectype, *misalignment,
    2522      1279160 :             mat_gather_scatter_p (*memory_access_type));
    2523      1279160 :       if (grouped_gather_fallback != VMAT_UNINITIALIZED
    2524            0 :           && *alignment_support_scheme != dr_aligned
    2525            0 :           && *alignment_support_scheme != dr_unaligned_supported)
    2526              :         {
    2527              :           /* No supportable alignment for a grouped gather, fall back to the
    2528              :              original memory access type.  Even though VMAT_STRIDED_SLP might
    2529              :              also try aligned vector loads it can still choose vector
    2530              :              construction from scalars.  */
    2531            0 :           *memory_access_type = grouped_gather_fallback;
    2532            0 :           *alignment_support_scheme = dr_unaligned_supported;
    2533            0 :           *misalignment = DR_MISALIGNMENT_UNKNOWN;
    2534              :         }
    2535              :     }
    2536              : 
    2537      1353779 :   if (overrun_p)
    2538              :     {
    2539         9232 :       gcc_assert (can_overrun_p);
    2540         9232 :       if (dump_enabled_p ())
    2541          511 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2542              :                          "Data access with gaps requires scalar "
    2543              :                          "epilogue loop\n");
    2544         9232 :       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
    2545              :     }
    2546              : 
    2547      1353779 :   if ((*memory_access_type == VMAT_ELEMENTWISE
    2548      1353779 :        || *memory_access_type == VMAT_STRIDED_SLP)
    2549              :       && !nunits.is_constant ())
    2550              :     {
    2551              :       if (dump_enabled_p ())
    2552              :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2553              :                          "Not using elementwise accesses due to variable "
    2554              :                          "vectorization factor.\n");
    2555              :       return false;
    2556              :     }
    2557              : 
    2558              :   /* Checks if all scalar iterations are known to be inbounds.  */
    2559      1353779 :   bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info));
    2560              : 
    2561              :   /* Check if we support the operation if early breaks are needed.  Here we
    2562              :      must ensure that we don't access any more than the scalar code would
    2563              :      have.  A masked operation would ensure this, so for these load types
    2564              :      force masking.  */
    2565      1353779 :   if (loop_vinfo
    2566       574641 :       && dr_safe_speculative_read_required (stmt_info)
    2567      1532874 :       && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    2568              :     {
    2569       179095 :       if (mat_gather_scatter_p (*memory_access_type)
    2570       179095 :           || *memory_access_type == VMAT_STRIDED_SLP)
    2571              :         {
    2572         9324 :           if (dump_enabled_p ())
    2573            8 :             dump_printf_loc (MSG_NOTE, vect_location,
    2574              :                              "early break not supported: cannot peel for "
    2575              :                              "alignment. With non-contiguous memory vectorization"
    2576              :                              " could read out of bounds at %G ",
    2577              :                              STMT_VINFO_STMT (stmt_info));
    2578         9324 :           if (inbounds)
    2579            0 :             LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
    2580              :           else
    2581              :             return false;
    2582              :         }
    2583              :       /* Block-level alignment: Even though individual accesses of
    2584              :          VMAT_ELEMENTWISE type do not cause alignment problems, loading the
    2585              :          whole vector's worth of values in a speculative early-break context
    2586              :          might cross a page boundary.  Set the alignment scheme to `dr_aligned'
    2587              :          here in order to force checking of whether such accesses meet
    2588              :          alignment criteria.  */
    2589       169771 :       else if (*memory_access_type == VMAT_ELEMENTWISE && !inbounds)
    2590        14873 :         *alignment_support_scheme = dr_aligned;
    2591              :     }
    2592              : 
    2593              :   /* If this DR needs alignment for correctness, we must ensure the target
    2594              :      alignment is a constant power-of-two multiple of the amount read per
    2595              :      vector iteration or force masking.  */
    2596      1344455 :   if (dr_safe_speculative_read_required (stmt_info)
    2597      1344455 :       && (*alignment_support_scheme == dr_aligned
    2598       102678 :           && !mat_gather_scatter_p (*memory_access_type)))
    2599              :     {
    2600              :       /* We can only peel for loops, of course.  */
    2601       102678 :       gcc_checking_assert (loop_vinfo);
    2602              : 
    2603       102678 :       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2604       102678 :       poly_uint64 read_amount
    2605       102678 :         = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2606       102678 :       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2607       102678 :         read_amount *= group_size;
    2608              : 
    2609       102678 :       auto target_alignment
    2610       102678 :         = DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
    2611       102678 :       if (!multiple_p (target_alignment, read_amount))
    2612              :         {
    2613        12708 :           if (dump_enabled_p ())
    2614              :             {
    2615           28 :               dump_printf_loc (MSG_NOTE, vect_location,
    2616              :                                "desired alignment not met, target was ");
    2617           28 :               dump_dec (MSG_NOTE, target_alignment);
    2618           28 :               dump_printf (MSG_NOTE, " previously, but read amount is ");
    2619           28 :               dump_dec (MSG_NOTE, read_amount);
    2620           28 :               dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
    2621              :             }
    2622        14897 :           return false;
    2623              :         }
    2624              : 
    2625              :       /* When using a group access the first element may be aligned but the
    2626              :          subsequent loads may not be.  For LOAD_LANES since the loads are based
    2627              :          on the first DR then all loads in the group are aligned.  For
    2628              :          non-LOAD_LANES this is not the case. In particular a load + blend when
    2629              :          there are gaps can have the non first loads issued unaligned, even
    2630              :          partially overlapping the memory of the first load in order to simplify
    2631              :          the blend.  This is what the x86_64 backend does for instance.  As
    2632              :          such only the first load in the group is aligned, the rest are not.
    2633              :          Because of this the permutes may break the alignment requirements that
    2634              :          have been set, and as such we should for now, reject them.  */
    2635        89970 :       if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
    2636              :         {
    2637         2189 :           if (dump_enabled_p ())
    2638           75 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2639              :                              "loads with load permutations not supported for "
    2640              :                              "speculative early break loads for %G",
    2641              :                              STMT_VINFO_STMT (stmt_info));
    2642         2189 :           return false;
    2643              :         }
    2644              : 
    2645              :       /* Reject vectorization if we know the read mount per vector iteration
    2646              :          exceeds the min page size.  */
    2647        87781 :       if (known_gt (read_amount, (unsigned) param_min_pagesize))
    2648              :         {
    2649            0 :           if (dump_enabled_p ())
    2650              :             {
    2651            0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2652              :                                "alignment required for correctness (");
    2653            0 :               dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
    2654            0 :               dump_printf (MSG_NOTE, ") may exceed page size.\n");
    2655              :             }
    2656            0 :           return false;
    2657              :         }
    2658              : 
    2659        87781 :       if (!vf.is_constant ())
    2660              :         {
    2661              :           /* For VLA modes, we need a runtime check to ensure any speculative
    2662              :              read amount does not exceed the page size.  Here we record the max
    2663              :              possible read amount for the check.  */
    2664              :           if (maybe_gt (read_amount,
    2665              :                         LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
    2666              :             LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
    2667              : 
    2668              :           /* For VLA modes, we must use partial vectors.  */
    2669              :           LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
    2670              :         }
    2671              :     }
    2672              : 
    2673      1329558 :   if (*alignment_support_scheme == dr_unaligned_unsupported)
    2674              :     {
    2675        63763 :       if (dump_enabled_p ())
    2676          256 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2677              :                          "unsupported unaligned access\n");
    2678        63763 :       return false;
    2679              :     }
    2680              : 
    2681              :   /* FIXME: At the moment the cost model seems to underestimate the
    2682              :      cost of using elementwise accesses.  This check preserves the
    2683              :      traditional behavior until that can be fixed.  */
    2684      1265795 :   if (*memory_access_type == VMAT_ELEMENTWISE
    2685        14744 :       && !STMT_VINFO_STRIDED_P (first_stmt_info)
    2686      1280539 :       && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
    2687         9655 :            && single_element_p
    2688         9012 :            && !pow2p_hwi (group_size)))
    2689              :     {
    2690         9068 :       if (dump_enabled_p ())
    2691          362 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2692              :                          "not falling back to elementwise accesses\n");
    2693         9068 :       return false;
    2694              :     }
    2695              : 
    2696              :   /* For BB vectorization build up the vector from existing scalar defs.  */
    2697      1256727 :   if (!loop_vinfo && *memory_access_type == VMAT_ELEMENTWISE)
    2698              :     return false;
    2699              : 
    2700              :   /* Some loads need to explicitly permute the loaded data if there
    2701              :      is a load permutation.  Among those are:
    2702              :       - VMAT_ELEMENTWISE.
    2703              :       - VMAT_STRIDED_SLP.
    2704              :       - VMAT_GATHER_SCATTER:
    2705              :         - Strided gather (fallback for VMAT_STRIDED_SLP if #lanes == 1).
    2706              :         - Grouped strided gather (ditto but for #lanes > 1).
    2707              : 
    2708              :      For VMAT_ELEMENTWISE we can fold the load permutation into the
    2709              :      individual indices we access directly, eliding the permutation.
    2710              :      Strided gather only allows load permutations for the
    2711              :      single-element case.  */
    2712              : 
    2713      1256727 :   if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
    2714      1256727 :       && !(*memory_access_type == VMAT_ELEMENTWISE
    2715        44604 :            || (mat_gather_scatter_p (*memory_access_type)
    2716            0 :                && SLP_TREE_LANES (slp_node) == 1
    2717            0 :                && single_element_p)))
    2718              :     {
    2719        44604 :       if (!loop_vinfo)
    2720              :         {
    2721              :           /* In BB vectorization we may not actually use a loaded vector
    2722              :              accessing elements in excess of DR_GROUP_SIZE.  */
    2723        22792 :           stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
    2724        22792 :           group_info = DR_GROUP_FIRST_ELEMENT (group_info);
    2725        22792 :           unsigned HOST_WIDE_INT nunits;
    2726        22792 :           unsigned j, k, maxk = 0;
    2727        81736 :           FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
    2728        58944 :             if (k > maxk)
    2729              :               maxk = k;
    2730        22792 :           tree vectype = SLP_TREE_VECTYPE (slp_node);
    2731        41308 :           if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
    2732        22792 :               || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
    2733              :             {
    2734         4276 :               if (dump_enabled_p ())
    2735           29 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2736              :                                  "BB vectorization with gaps at the end of "
    2737              :                                  "a load is not supported\n");
    2738         4276 :               return false;
    2739              :             }
    2740              :         }
    2741              : 
    2742        40328 :       if (!perm_ok)
    2743              :         {
    2744         1988 :           if (dump_enabled_p ())
    2745            8 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION,
    2746              :                              vect_location,
    2747              :                              "unsupported load permutation\n");
    2748         1988 :           return false;
    2749              :         }
    2750              : 
    2751        38340 :       *slp_perm = true;
    2752              :     }
    2753              : 
    2754              :   return true;
    2755              : }
    2756              : 
    2757              : /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
    2758              :    conditional operation STMT_INFO.  When returning true, store the mask
    2759              :    in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of
    2760              :    the vectorized mask in *MASK_VECTYPE_OUT.  */
    2761              : 
    2762              : static bool
    2763        12689 : vect_check_scalar_mask (vec_info *vinfo,
    2764              :                         slp_tree slp_node, unsigned mask_index,
    2765              :                         slp_tree *mask_node,
    2766              :                         vect_def_type *mask_dt_out, tree *mask_vectype_out)
    2767              : {
    2768        12689 :   enum vect_def_type mask_dt;
    2769        12689 :   tree mask_vectype;
    2770        12689 :   slp_tree mask_node_1;
    2771        12689 :   tree mask_;
    2772        12689 :   if (!vect_is_simple_use (vinfo, slp_node, mask_index,
    2773              :                            &mask_, &mask_node_1, &mask_dt, &mask_vectype))
    2774              :     {
    2775            0 :       if (dump_enabled_p ())
    2776            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2777              :                          "mask use not simple.\n");
    2778            0 :       return false;
    2779              :     }
    2780              : 
    2781        12689 :   if ((mask_dt == vect_constant_def || mask_dt == vect_external_def)
    2782        12689 :       && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_)))
    2783              :     {
    2784            0 :       if (dump_enabled_p ())
    2785            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2786              :                          "mask argument is not a boolean.\n");
    2787            0 :       return false;
    2788              :     }
    2789              : 
    2790        12689 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
    2791        12689 :   if (!mask_vectype)
    2792           19 :     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
    2793              :                                                   mask_node_1);
    2794              : 
    2795        12689 :   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
    2796              :     {
    2797            0 :       if (dump_enabled_p ())
    2798            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2799              :                          "could not find an appropriate vector mask type.\n");
    2800            0 :       return false;
    2801              :     }
    2802              : 
    2803        12689 :   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
    2804        25378 :                 TYPE_VECTOR_SUBPARTS (vectype)))
    2805              :     {
    2806            0 :       if (dump_enabled_p ())
    2807            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2808              :                          "vector mask type %T"
    2809              :                          " does not match vector data type %T.\n",
    2810              :                          mask_vectype, vectype);
    2811              : 
    2812            0 :       return false;
    2813              :     }
    2814              : 
    2815        12689 :   *mask_dt_out = mask_dt;
    2816        12689 :   *mask_vectype_out = mask_vectype;
    2817        12689 :   *mask_node = mask_node_1;
    2818        12689 :   return true;
    2819              : }
    2820              : 
    2821              : 
    2822              : /* Return true if stored value is suitable for vectorizing store
    2823              :    statement STMT_INFO.  When returning true, store the scalar stored
    2824              :    in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
    2825              :    the type of the vectorized store value in
    2826              :    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
    2827              : 
    2828              : static bool
    2829      1361097 : vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
    2830              :                       slp_tree slp_node, slp_tree *rhs_node,
    2831              :                       vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
    2832              :                       vec_load_store_type *vls_type_out)
    2833              : {
    2834      1361097 :   int op_no = 0;
    2835      1361097 :   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
    2836              :     {
    2837         1899 :       if (gimple_call_internal_p (call)
    2838         1899 :           && internal_store_fn_p (gimple_call_internal_fn (call)))
    2839         1899 :         op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
    2840              :     }
    2841      1361097 :   op_no = vect_slp_child_index_for_operand (stmt_info, op_no);
    2842              : 
    2843      1361097 :   enum vect_def_type rhs_dt;
    2844      1361097 :   tree rhs_vectype;
    2845      1361097 :   tree rhs;
    2846      1361097 :   if (!vect_is_simple_use (vinfo, slp_node, op_no,
    2847              :                            &rhs, rhs_node, &rhs_dt, &rhs_vectype))
    2848              :     {
    2849            0 :       if (dump_enabled_p ())
    2850            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2851              :                          "use not simple.\n");
    2852            0 :       return false;
    2853              :     }
    2854              : 
    2855              :   /* In the case this is a store from a constant make sure
    2856              :      native_encode_expr can handle it.  */
    2857      1361097 :   if (rhs_dt == vect_constant_def
    2858      1361097 :       && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
    2859              :     {
    2860            0 :       if (dump_enabled_p ())
    2861            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2862              :                          "cannot encode constant as a byte sequence.\n");
    2863            0 :       return false;
    2864              :     }
    2865              : 
    2866      1361097 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
    2867      1361097 :   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
    2868              :     {
    2869           24 :       if (dump_enabled_p ())
    2870           24 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2871              :                          "incompatible vector types.\n");
    2872           24 :       return false;
    2873              :     }
    2874              : 
    2875      1361073 :   *rhs_dt_out = rhs_dt;
    2876      1361073 :   *rhs_vectype_out = rhs_vectype;
    2877      1361073 :   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
    2878      1002757 :     *vls_type_out = VLS_STORE_INVARIANT;
    2879              :   else
    2880       358316 :     *vls_type_out = VLS_STORE;
    2881              :   return true;
    2882              : }
    2883              : 
    2884              : /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
    2885              :    Note that we support masks with floating-point type, in which case the
    2886              :    floats are interpreted as a bitmask.  */
    2887              : 
    2888              : static tree
    2889          170 : vect_build_all_ones_mask (vec_info *vinfo,
    2890              :                           stmt_vec_info stmt_info, tree masktype)
    2891              : {
    2892          170 :   if (TREE_CODE (masktype) == INTEGER_TYPE)
    2893           98 :     return build_int_cst (masktype, -1);
    2894           72 :   else if (VECTOR_BOOLEAN_TYPE_P (masktype)
    2895          144 :            || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
    2896              :     {
    2897           19 :       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
    2898           19 :       mask = build_vector_from_val (masktype, mask);
    2899           19 :       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
    2900              :     }
    2901           53 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
    2902              :     {
    2903              :       REAL_VALUE_TYPE r;
    2904              :       long tmp[6];
    2905          371 :       for (int j = 0; j < 6; ++j)
    2906          318 :         tmp[j] = -1;
    2907           53 :       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
    2908           53 :       tree mask = build_real (TREE_TYPE (masktype), r);
    2909           53 :       mask = build_vector_from_val (masktype, mask);
    2910           53 :       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
    2911              :     }
    2912            0 :   gcc_unreachable ();
    2913              : }
    2914              : 
    2915              : /* Build an all-zero merge value of type VECTYPE while vectorizing
    2916              :    STMT_INFO as a gather load.  */
    2917              : 
    2918              : static tree
    2919          158 : vect_build_zero_merge_argument (vec_info *vinfo,
    2920              :                                 stmt_vec_info stmt_info, tree vectype)
    2921              : {
    2922          158 :   tree merge;
    2923          158 :   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
    2924           49 :     merge = build_int_cst (TREE_TYPE (vectype), 0);
    2925          109 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
    2926              :     {
    2927              :       REAL_VALUE_TYPE r;
    2928              :       long tmp[6];
    2929          763 :       for (int j = 0; j < 6; ++j)
    2930          654 :         tmp[j] = 0;
    2931          109 :       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
    2932          109 :       merge = build_real (TREE_TYPE (vectype), r);
    2933              :     }
    2934              :   else
    2935            0 :     gcc_unreachable ();
    2936          158 :   merge = build_vector_from_val (vectype, merge);
    2937          158 :   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
    2938              : }
    2939              : 
    2940              : /* Return the corresponding else value for an else value constant
    2941              :    ELSVAL with type TYPE.  */
    2942              : 
    2943              : tree
    2944         1944 : vect_get_mask_load_else (int elsval, tree type)
    2945              : {
    2946         1944 :   tree els;
    2947         1944 :   if (elsval == MASK_LOAD_ELSE_UNDEFINED)
    2948              :     {
    2949            0 :       tree tmp = create_tmp_var (type);
    2950              :       /* No need to warn about anything.  */
    2951            0 :       TREE_NO_WARNING (tmp) = 1;
    2952            0 :       els = get_or_create_ssa_default_def (cfun, tmp);
    2953              :     }
    2954         1944 :   else if (elsval == MASK_LOAD_ELSE_M1)
    2955            0 :     els = build_minus_one_cst (type);
    2956         1944 :   else if (elsval == MASK_LOAD_ELSE_ZERO)
    2957         1944 :     els = build_zero_cst (type);
    2958              :   else
    2959            0 :     gcc_unreachable ();
    2960              : 
    2961         1944 :   return els;
    2962              : }
    2963              : 
    2964              : /* Build a gather load call while vectorizing STMT_INFO.  Insert new
    2965              :    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
    2966              :    the gather load operation.  If the load is conditional, MASK is the
    2967              :    vectorized condition, otherwise MASK is null.  PTR is the base
    2968              :    pointer and OFFSET is the vectorized offset.  */
    2969              : 
    2970              : static gimple *
    2971          346 : vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
    2972              :                                  slp_tree slp_node, tree vectype,
    2973              :                                  gimple_stmt_iterator *gsi, tree decl,
    2974              :                                  tree ptr, tree offset, tree mask)
    2975              : {
    2976          346 :   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
    2977          346 :   tree rettype = TREE_TYPE (TREE_TYPE (decl));
    2978          346 :   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    2979          346 :   /* ptrtype */ arglist = TREE_CHAIN (arglist);
    2980          346 :   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    2981          346 :   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    2982          346 :   tree scaletype = TREE_VALUE (arglist);
    2983          346 :   tree var;
    2984          346 :   gcc_checking_assert (types_compatible_p (srctype, rettype)
    2985              :                        && (!mask
    2986              :                            || TREE_CODE (masktype) == INTEGER_TYPE
    2987              :                            || types_compatible_p (srctype, masktype)));
    2988              : 
    2989          346 :   tree op = offset;
    2990          346 :   if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
    2991              :     {
    2992          100 :       gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
    2993              :                             TYPE_VECTOR_SUBPARTS (idxtype)));
    2994          100 :       var = vect_get_new_ssa_name (idxtype, vect_simple_var);
    2995          100 :       op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
    2996          100 :       gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
    2997          100 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    2998          100 :       op = var;
    2999              :     }
    3000              : 
    3001          346 :   tree src_op = NULL_TREE;
    3002          346 :   tree mask_op = NULL_TREE;
    3003          346 :   if (mask)
    3004              :     {
    3005          188 :       if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
    3006              :         {
    3007          188 :           tree utype, optype = TREE_TYPE (mask);
    3008          188 :           if (VECTOR_TYPE_P (masktype)
    3009          188 :               || TYPE_MODE (masktype) == TYPE_MODE (optype))
    3010              :             utype = masktype;
    3011              :           else
    3012            6 :             utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
    3013          188 :           var = vect_get_new_ssa_name (utype, vect_scalar_var);
    3014          188 :           tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
    3015          188 :           gassign *new_stmt
    3016          188 :               = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
    3017          188 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3018          188 :           mask_arg = var;
    3019          188 :           if (!useless_type_conversion_p (masktype, utype))
    3020              :             {
    3021            6 :               gcc_assert (TYPE_PRECISION (utype)
    3022              :                           <= TYPE_PRECISION (masktype));
    3023            6 :               var = vect_get_new_ssa_name (masktype, vect_scalar_var);
    3024            6 :               new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
    3025            6 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3026            6 :               mask_arg = var;
    3027              :             }
    3028          188 :           src_op = build_zero_cst (srctype);
    3029          188 :           mask_op = mask_arg;
    3030              :         }
    3031              :       else
    3032              :         {
    3033              :           src_op = mask;
    3034              :           mask_op = mask;
    3035              :         }
    3036              :     }
    3037              :   else
    3038              :     {
    3039          158 :       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
    3040          158 :       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
    3041              :     }
    3042              : 
    3043          346 :   tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
    3044          346 :   gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
    3045              :                                         mask_op, scale);
    3046              : 
    3047          346 :   if (!useless_type_conversion_p (vectype, rettype))
    3048              :     {
    3049           49 :       gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
    3050              :                             TYPE_VECTOR_SUBPARTS (rettype)));
    3051           49 :       op = vect_get_new_ssa_name (rettype, vect_simple_var);
    3052           49 :       gimple_call_set_lhs (new_stmt, op);
    3053           49 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3054           49 :       op = build1 (VIEW_CONVERT_EXPR, vectype, op);
    3055           49 :       new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
    3056              :     }
    3057              : 
    3058          346 :   return new_stmt;
    3059              : }
    3060              : 
    3061              : /* Build a scatter store call while vectorizing STMT_INFO.  Insert new
    3062              :    instructions before GSI.  GS_INFO describes the scatter store operation.
    3063              :    PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
    3064              :    vectorized data to store.
    3065              :    If the store is conditional, MASK is the vectorized condition, otherwise
    3066              :    MASK is null.  */
    3067              : 
    3068              : static gimple *
    3069          161 : vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
    3070              :                                    slp_tree slp_node,
    3071              :                                    gimple_stmt_iterator *gsi,
    3072              :                                    tree decl,
    3073              :                                    tree ptr, tree offset, tree oprnd, tree mask)
    3074              : {
    3075          161 :   tree rettype = TREE_TYPE (TREE_TYPE (decl));
    3076          161 :   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
    3077          161 :   /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
    3078          161 :   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    3079          161 :   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    3080          161 :   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
    3081          161 :   tree scaletype = TREE_VALUE (arglist);
    3082          161 :   gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
    3083              :                        && TREE_CODE (rettype) == VOID_TYPE);
    3084              : 
    3085          161 :   tree mask_arg = NULL_TREE;
    3086          161 :   if (mask)
    3087              :     {
    3088          110 :       mask_arg = mask;
    3089          110 :       tree optype = TREE_TYPE (mask_arg);
    3090          110 :       tree utype;
    3091          110 :       if (TYPE_MODE (masktype) == TYPE_MODE (optype))
    3092              :         utype = masktype;
    3093              :       else
    3094            8 :         utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
    3095          110 :       tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
    3096          110 :       mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
    3097          110 :       gassign *new_stmt
    3098          110 :         = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
    3099          110 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3100          110 :       mask_arg = var;
    3101          110 :       if (!useless_type_conversion_p (masktype, utype))
    3102              :         {
    3103            8 :           gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
    3104            8 :           tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
    3105            8 :           new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
    3106            8 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3107            8 :           mask_arg = var;
    3108              :         }
    3109              :     }
    3110              :   else
    3111              :     {
    3112           51 :       mask_arg = build_int_cst (masktype, -1);
    3113           51 :       mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
    3114              :     }
    3115              : 
    3116          161 :   tree src = oprnd;
    3117          161 :   if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
    3118              :     {
    3119            0 :       gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
    3120              :                             TYPE_VECTOR_SUBPARTS (srctype)));
    3121            0 :       tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
    3122            0 :       src = build1 (VIEW_CONVERT_EXPR, srctype, src);
    3123            0 :       gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
    3124            0 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3125            0 :       src = var;
    3126              :     }
    3127              : 
    3128          161 :   tree op = offset;
    3129          161 :   if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
    3130              :     {
    3131           16 :       gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
    3132              :                             TYPE_VECTOR_SUBPARTS (idxtype)));
    3133           16 :       tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
    3134           16 :       op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
    3135           16 :       gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
    3136           16 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3137           16 :       op = var;
    3138              :     }
    3139              : 
    3140          161 :   tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
    3141          161 :   gcall *new_stmt
    3142          161 :     = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
    3143          161 :   return new_stmt;
    3144              : }
    3145              : 
    3146              : /* Prepare the base and offset in GS_INFO for vectorization.
    3147              :    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
    3148              :    to the vectorized offset argument for the first copy of STMT_INFO.
    3149              :    STMT_INFO is the statement described by GS_INFO and LOOP is the
    3150              :    containing loop.  */
    3151              : 
    3152              : static void
    3153         1236 : vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
    3154              :                              tree *dataref_ptr, vec<tree> *vec_offset)
    3155              : {
    3156         1236 :   gimple_seq stmts = NULL;
    3157         1236 :   *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
    3158              :                                        &stmts, true, NULL_TREE);
    3159         1236 :   if (stmts != NULL)
    3160              :     {
    3161         1003 :       basic_block new_bb;
    3162         1003 :       edge pe = loop_preheader_edge (loop);
    3163         1003 :       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
    3164         1003 :       gcc_assert (!new_bb);
    3165              :     }
    3166         1236 :   vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
    3167         1236 : }
    3168              : 
    3169              : /* Prepare to implement a grouped or strided load or store using
    3170              :    the gather load or scatter store operation described by GS_INFO.
    3171              :    STMT_INFO is the load or store statement.
    3172              : 
    3173              :    Set *DATAREF_BUMP to the amount that should be added to the base
    3174              :    address after each copy of the vectorized statement.  Set *VEC_OFFSET
    3175              :    to an invariant offset vector in which element I has the value
    3176              :    I * DR_STEP / SCALE.  */
    3177              : 
    3178              : static void
    3179            0 : vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
    3180              :                                  tree vectype, tree offset_vectype,
    3181              :                                  loop_vec_info loop_vinfo,
    3182              :                                  gimple_stmt_iterator *gsi,
    3183              :                                  tree *dataref_bump, tree *vec_offset,
    3184              :                                  vec_loop_lens *loop_lens)
    3185              : {
    3186            0 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    3187              : 
    3188            0 :   if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
    3189              :     {
    3190              :       /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
    3191              :          ivtmp_8 = _31 * 16 (step in bytes);
    3192              :          .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
    3193              :          vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
    3194            0 :       tree loop_len
    3195            0 :         = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, true);
    3196            0 :       tree tmp
    3197            0 :         = fold_build2 (MULT_EXPR, sizetype,
    3198              :                        fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
    3199              :                        loop_len);
    3200            0 :       *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
    3201              :                                                 GSI_SAME_STMT);
    3202              :     }
    3203              :   else
    3204              :     {
    3205            0 :       tree bump
    3206            0 :         = size_binop (MULT_EXPR,
    3207              :                       fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
    3208              :                       size_int (TYPE_VECTOR_SUBPARTS (vectype)));
    3209            0 :       *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
    3210              :     }
    3211              : 
    3212            0 :   internal_fn ifn
    3213            0 :     = DR_IS_READ (dr) ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
    3214            0 :   if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    3215              :     {
    3216            0 :       *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
    3217              :                                                    unshare_expr (DR_STEP (dr)));
    3218            0 :       return;
    3219              :     }
    3220              : 
    3221              :   /* The offset given in GS_INFO can have pointer type, so use the element
    3222              :      type of the vector instead.  */
    3223            0 :   tree offset_type = TREE_TYPE (offset_vectype);
    3224              : 
    3225              :   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
    3226            0 :   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
    3227              :                           ssize_int (SLP_TREE_GS_SCALE (node)));
    3228            0 :   step = fold_convert (offset_type, step);
    3229              : 
    3230              :   /* Create {0, X, X*2, X*3, ...}.  */
    3231            0 :   tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
    3232              :                              build_zero_cst (offset_type), step);
    3233            0 :   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
    3234              : }
    3235              : 
    3236              : /* Prepare the pointer IVs which needs to be updated by a variable amount.
    3237              :    Such variable amount is the outcome of .SELECT_VL. In this case, we can
    3238              :    allow each iteration process the flexible number of elements as long as
    3239              :    the number <= vf elements.
    3240              : 
    3241              :    Return data reference according to SELECT_VL.
    3242              :    If new statements are needed, insert them before GSI.  */
    3243              : 
    3244              : static tree
    3245            0 : vect_get_loop_variant_data_ptr_increment (
    3246              :   vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
    3247              :   vec_loop_lens *loop_lens, dr_vec_info *dr_info,
    3248              :   vect_memory_access_type memory_access_type)
    3249              : {
    3250            0 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
    3251            0 :   tree step = vect_dr_behavior (vinfo, dr_info)->step;
    3252              : 
    3253              :   /* gather/scatter never reach here.  */
    3254            0 :   gcc_assert (!mat_gather_scatter_p (memory_access_type));
    3255              : 
    3256              :   /* When we support SELECT_VL pattern, we dynamic adjust
    3257              :      the memory address by .SELECT_VL result.
    3258              : 
    3259              :      The result of .SELECT_VL is the number of elements to
    3260              :      be processed of each iteration. So the memory address
    3261              :      adjustment operation should be:
    3262              : 
    3263              :      addr = addr + .SELECT_VL (ARG..) * step;
    3264              :   */
    3265            0 :   tree loop_len
    3266            0 :     = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
    3267            0 :   tree len_type = TREE_TYPE (loop_len);
    3268              :   /* Since the outcome of .SELECT_VL is element size, we should adjust
    3269              :      it into bytesize so that it can be used in address pointer variable
    3270              :      amount IVs adjustment.  */
    3271            0 :   tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
    3272              :                           wide_int_to_tree (len_type, wi::to_widest (step)));
    3273            0 :   tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
    3274            0 :   gassign *assign = gimple_build_assign (bump, tmp);
    3275            0 :   gsi_insert_before (gsi, assign, GSI_SAME_STMT);
    3276            0 :   return bump;
    3277              : }
    3278              : 
    3279              : /* Return the amount that should be added to a vector pointer to move
    3280              :    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
    3281              :    being vectorized and MEMORY_ACCESS_TYPE describes the type of
    3282              :    vectorization.  */
    3283              : 
    3284              : static tree
    3285       703494 : vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
    3286              :                              dr_vec_info *dr_info, tree aggr_type,
    3287              :                              vect_memory_access_type memory_access_type,
    3288              :                              vec_loop_lens *loop_lens)
    3289              : {
    3290       703494 :   if (memory_access_type == VMAT_INVARIANT)
    3291            0 :     return size_zero_node;
    3292              : 
    3293       703494 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
    3294       134869 :   if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
    3295            0 :     return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
    3296              :                                                      loop_lens, dr_info,
    3297            0 :                                                      memory_access_type);
    3298              : 
    3299       703494 :   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
    3300       703494 :   tree step = vect_dr_behavior (vinfo, dr_info)->step;
    3301       703494 :   if (tree_int_cst_sgn (step) == -1)
    3302         2842 :     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
    3303              :   return iv_step;
    3304              : }
    3305              : 
    3306              : /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
    3307              : 
    3308              : static bool
    3309          126 : vectorizable_bswap (vec_info *vinfo,
    3310              :                     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    3311              :                     slp_tree slp_node,
    3312              :                     slp_tree *slp_op,
    3313              :                     tree vectype_in, stmt_vector_for_cost *cost_vec)
    3314              : {
    3315          126 :   tree op, vectype;
    3316          126 :   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
    3317              : 
    3318          126 :   op = gimple_call_arg (stmt, 0);
    3319          126 :   vectype = SLP_TREE_VECTYPE (slp_node);
    3320          126 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    3321              : 
    3322          126 :   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
    3323              :     {
    3324            0 :       if (dump_enabled_p ())
    3325            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3326              :                          "mismatched vector sizes %T and %T\n",
    3327              :                          vectype_in, vectype);
    3328            0 :       return false;
    3329              :     }
    3330              : 
    3331          126 :   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
    3332          126 :   if (! char_vectype)
    3333              :     return false;
    3334              : 
    3335          126 :   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
    3336          126 :   unsigned word_bytes;
    3337          126 :   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
    3338              :     return false;
    3339              : 
    3340              :   /* The encoding uses one stepped pattern for each byte in the word.  */
    3341          126 :   vec_perm_builder elts (num_bytes, word_bytes, 3);
    3342          504 :   for (unsigned i = 0; i < 3; ++i)
    3343         2274 :     for (unsigned j = 0; j < word_bytes; ++j)
    3344         1896 :       elts.quick_push ((i + 1) * word_bytes - j - 1);
    3345              : 
    3346          126 :   vec_perm_indices indices (elts, 1, num_bytes);
    3347          126 :   machine_mode vmode = TYPE_MODE (char_vectype);
    3348          126 :   if (!can_vec_perm_const_p (vmode, vmode, indices))
    3349              :     return false;
    3350              : 
    3351           59 :   if (cost_vec)
    3352              :     {
    3353           47 :       if (!vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
    3354              :         {
    3355            0 :           if (dump_enabled_p ())
    3356            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3357              :                              "incompatible vector types for invariants\n");
    3358            0 :           return false;
    3359              :         }
    3360              : 
    3361           47 :       SLP_TREE_TYPE (slp_node) = call_vec_info_type;
    3362           47 :       DUMP_VECT_SCOPE ("vectorizable_bswap");
    3363           47 :       record_stmt_cost (cost_vec,
    3364              :                         1, vector_stmt, slp_node, 0, vect_prologue);
    3365           47 :       record_stmt_cost (cost_vec,
    3366           47 :                         vect_get_num_copies (vinfo, slp_node),
    3367              :                         vec_perm, slp_node, 0, vect_body);
    3368           47 :       return true;
    3369              :     }
    3370              : 
    3371           12 :   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
    3372              : 
    3373              :   /* Transform.  */
    3374           12 :   vec<tree> vec_oprnds = vNULL;
    3375           12 :   vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
    3376              :   /* Arguments are ready. create the new vector stmt.  */
    3377           12 :   unsigned i;
    3378           12 :   tree vop;
    3379           24 :   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
    3380              :     {
    3381           12 :       gimple *new_stmt;
    3382           12 :       tree tem = make_ssa_name (char_vectype);
    3383           12 :       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
    3384              :                                                    char_vectype, vop));
    3385           12 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3386           12 :       tree tem2 = make_ssa_name (char_vectype);
    3387           12 :       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
    3388              :                                       tem, tem, bswap_vconst);
    3389           12 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3390           12 :       tem = make_ssa_name (vectype);
    3391           12 :       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
    3392              :                                                    vectype, tem2));
    3393           12 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3394           12 :       slp_node->push_vec_def (new_stmt);
    3395              :     }
    3396              : 
    3397           12 :   vec_oprnds.release ();
    3398           12 :   return true;
    3399          126 : }
    3400              : 
    3401              : /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
    3402              :    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
    3403              :    in a single step.  On success, store the binary pack code in
    3404              :    *CONVERT_CODE.  */
    3405              : 
    3406              : static bool
    3407          184 : simple_integer_narrowing (tree vectype_out, tree vectype_in,
    3408              :                           code_helper *convert_code)
    3409              : {
    3410          368 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
    3411          368 :       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
    3412              :     return false;
    3413              : 
    3414           74 :   code_helper code;
    3415           74 :   int multi_step_cvt = 0;
    3416           74 :   auto_vec <tree, 8> interm_types;
    3417          107 :   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
    3418              :                                         &code, &multi_step_cvt, &interm_types)
    3419           74 :       || multi_step_cvt)
    3420           33 :     return false;
    3421              : 
    3422           41 :   *convert_code = code;
    3423           41 :   return true;
    3424           74 : }
    3425              : 
    3426              : /* Function vectorizable_call.
    3427              : 
    3428              :    Check if STMT_INFO performs a function call that can be vectorized.
    3429              :    If COST_VEC is passed, calculate costs but don't change anything,
    3430              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    3431              :    it, and insert it at GSI.
    3432              :    Return true if STMT_INFO is vectorizable in this way.  */
    3433              : 
    3434              : static bool
    3435      2667715 : vectorizable_call (vec_info *vinfo,
    3436              :                    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    3437              :                    slp_tree slp_node,
    3438              :                    stmt_vector_for_cost *cost_vec)
    3439              : {
    3440      2667715 :   gcall *stmt;
    3441      2667715 :   tree vec_dest;
    3442      2667715 :   tree scalar_dest;
    3443      2667715 :   tree op;
    3444      2667715 :   tree vec_oprnd0 = NULL_TREE;
    3445      2667715 :   tree vectype_out, vectype_in;
    3446      2667715 :   poly_uint64 nunits_in;
    3447      2667715 :   poly_uint64 nunits_out;
    3448      2667715 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    3449      2667715 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    3450      2667715 :   tree fndecl, new_temp, rhs_type;
    3451      2667715 :   enum vect_def_type dt[5]
    3452              :     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
    3453              :         vect_unknown_def_type, vect_unknown_def_type };
    3454      2667715 :   tree vectypes[ARRAY_SIZE (dt)] = {};
    3455      2667715 :   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
    3456      2667715 :   auto_vec<tree, 8> vargs;
    3457      2667715 :   enum { NARROW, NONE, WIDEN } modifier;
    3458      2667715 :   size_t i, nargs;
    3459      2667715 :   tree clz_ctz_arg1 = NULL_TREE;
    3460              : 
    3461      2667715 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    3462              :     return false;
    3463              : 
    3464      2667715 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    3465       234683 :       && cost_vec)
    3466              :     return false;
    3467              : 
    3468              :   /* Is STMT_INFO a vectorizable call?   */
    3469      2681451 :   stmt = dyn_cast <gcall *> (stmt_info->stmt);
    3470        24880 :   if (!stmt)
    3471              :     return false;
    3472              : 
    3473        24880 :   if (gimple_call_internal_p (stmt)
    3474        24880 :       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
    3475        16468 :           || internal_store_fn_p (gimple_call_internal_fn (stmt))))
    3476              :     /* Handled by vectorizable_load and vectorizable_store.  */
    3477         3815 :     return false;
    3478              : 
    3479        21065 :   if (gimple_call_lhs (stmt) == NULL_TREE
    3480        21065 :       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
    3481              :     return false;
    3482              : 
    3483        21059 :   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
    3484              : 
    3485        21059 :   vectype_out = SLP_TREE_VECTYPE (slp_node);
    3486              : 
    3487              :   /* Process function arguments.  */
    3488        21059 :   rhs_type = NULL_TREE;
    3489        21059 :   vectype_in = NULL_TREE;
    3490        21059 :   nargs = gimple_call_num_args (stmt);
    3491              : 
    3492              :   /* Bail out if the function has more than four arguments, we do not have
    3493              :      interesting builtin functions to vectorize with more than two arguments
    3494              :      except for fma (cond_fma has more).  No arguments is also not good.  */
    3495        21059 :   if (nargs == 0 || nargs > 5)
    3496              :     return false;
    3497              : 
    3498              :   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
    3499        20979 :   combined_fn cfn = gimple_call_combined_fn (stmt);
    3500        20979 :   if (cfn == CFN_GOMP_SIMD_LANE)
    3501              :     {
    3502         3207 :       nargs = 0;
    3503         3207 :       rhs_type = unsigned_type_node;
    3504              :     }
    3505              :   /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
    3506              :      argument just says whether it is well-defined at zero or not and what
    3507              :      value should be returned for it.  */
    3508        20979 :   if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
    3509              :     {
    3510          168 :       nargs = 1;
    3511          168 :       clz_ctz_arg1 = gimple_call_arg (stmt, 1);
    3512              :     }
    3513              : 
    3514        20979 :   int mask_opno = -1;
    3515        20979 :   if (internal_fn_p (cfn))
    3516              :     {
    3517              :       /* We can only handle direct internal masked calls here,
    3518              :          vectorizable_simd_clone_call is for the rest.  */
    3519        17985 :       if (cfn == CFN_MASK_CALL)
    3520              :         return false;
    3521        17831 :       mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
    3522              :     }
    3523              : 
    3524        65773 :   for (i = 0; i < nargs; i++)
    3525              :     {
    3526        46234 :       if ((int) i == mask_opno)
    3527              :         {
    3528         7694 :           if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno,
    3529              :                                        &slp_op[i], &dt[i], &vectypes[i]))
    3530              :             return false;
    3531         7694 :           continue;
    3532              :         }
    3533              : 
    3534        38540 :       if (!vect_is_simple_use (vinfo, slp_node,
    3535              :                                i, &op, &slp_op[i], &dt[i], &vectypes[i]))
    3536              :         {
    3537            0 :           if (dump_enabled_p ())
    3538            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3539              :                              "use not simple.\n");
    3540            0 :           return false;
    3541              :         }
    3542              : 
    3543              :       /* We can only handle calls with arguments of the same type.  */
    3544        38540 :       if (rhs_type
    3545        38540 :           && !types_compatible_p (rhs_type, TREE_TYPE (op)))
    3546              :         {
    3547         1286 :           if (dump_enabled_p ())
    3548          200 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3549              :                              "argument types differ.\n");
    3550         1286 :           return false;
    3551              :         }
    3552        37254 :       if (!rhs_type)
    3553        17618 :         rhs_type = TREE_TYPE (op);
    3554              : 
    3555        37254 :       if (!vectype_in)
    3556        18158 :         vectype_in = vectypes[i];
    3557        19096 :       else if (vectypes[i]
    3558        19096 :                && !types_compatible_p (vectypes[i], vectype_in))
    3559              :         {
    3560            0 :           if (dump_enabled_p ())
    3561            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3562              :                              "argument vector types differ.\n");
    3563            0 :           return false;
    3564              :         }
    3565              :     }
    3566              :   /* If all arguments are external or constant defs, infer the vector type
    3567              :      from the scalar type.  */
    3568        19539 :   if (!vectype_in)
    3569         5504 :     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
    3570        19539 :   if (!cost_vec)
    3571         4193 :     gcc_assert (vectype_in);
    3572        15346 :   if (!vectype_in)
    3573              :     {
    3574         1029 :       if (dump_enabled_p ())
    3575            4 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3576              :                          "no vectype for scalar type %T\n", rhs_type);
    3577              : 
    3578         1029 :       return false;
    3579              :     }
    3580              : 
    3581        37020 :   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
    3582        18510 :       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
    3583              :     {
    3584           12 :       if (dump_enabled_p ())
    3585           12 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3586              :                          "mixed mask and nonmask vector types\n");
    3587           12 :       return false;
    3588              :     }
    3589              : 
    3590        18498 :   if (vect_emulated_vector_p (vectype_in)
    3591        18498 :       || vect_emulated_vector_p (vectype_out))
    3592              :     {
    3593            0 :       if (dump_enabled_p ())
    3594            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3595              :                          "use emulated vector type for call\n");
    3596            0 :       return false;
    3597              :     }
    3598              : 
    3599              :   /* FORNOW */
    3600        18498 :   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
    3601        18498 :   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    3602        18498 :   if (known_eq (nunits_in * 2, nunits_out))
    3603              :     modifier = NARROW;
    3604        17927 :   else if (known_eq (nunits_out, nunits_in))
    3605              :     modifier = NONE;
    3606           45 :   else if (known_eq (nunits_out * 2, nunits_in))
    3607              :     modifier = WIDEN;
    3608              :   else
    3609              :     return false;
    3610              : 
    3611              :   /* We only handle functions that do not read or clobber memory.  */
    3612        36996 :   if (gimple_vuse (stmt))
    3613              :     {
    3614         1218 :       if (dump_enabled_p ())
    3615           14 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3616              :                          "function reads from or writes to memory.\n");
    3617         1218 :       return false;
    3618              :     }
    3619              : 
    3620              :   /* For now, we only vectorize functions if a target specific builtin
    3621              :      is available.  TODO -- in some cases, it might be profitable to
    3622              :      insert the calls for pieces of the vector, in order to be able
    3623              :      to vectorize other operations in the loop.  */
    3624        17280 :   fndecl = NULL_TREE;
    3625        17280 :   internal_fn ifn = IFN_LAST;
    3626        17280 :   tree callee = gimple_call_fndecl (stmt);
    3627              : 
    3628              :   /* First try using an internal function.  */
    3629        17280 :   code_helper convert_code = MAX_TREE_CODES;
    3630        17280 :   if (cfn != CFN_LAST
    3631        17280 :       && (modifier == NONE
    3632          196 :           || (modifier == NARROW
    3633          184 :               && simple_integer_narrowing (vectype_out, vectype_in,
    3634              :                                            &convert_code))))
    3635        16255 :     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
    3636              :                                           vectype_in);
    3637              : 
    3638              :   /* Check if the operation traps.  */
    3639        17280 :   bool could_trap = gimple_could_trap_p (STMT_VINFO_STMT (stmt_info));
    3640        17280 :   if (could_trap && cost_vec && loop_vinfo)
    3641              :     {
    3642              :       /* If the operation can trap it must be conditional, otherwise fail.  */
    3643          474 :       internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
    3644          474 :                              ? ifn : get_conditional_internal_fn (ifn));
    3645          474 :       internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
    3646          474 :       if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
    3647              :         {
    3648              :           /* We assume that BB SLP fills all lanes, so no inactive lanes can
    3649              :              cause issues.  */
    3650           84 :           if ((cond_fn == IFN_LAST
    3651           56 :                || !direct_internal_fn_supported_p (cond_fn, vectype_out,
    3652              :                                                    OPTIMIZE_FOR_SPEED))
    3653          140 :               && (cond_len_fn == IFN_LAST
    3654           56 :                   || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
    3655              :                                                       OPTIMIZE_FOR_SPEED)))
    3656              :             {
    3657           84 :               if (dump_enabled_p ())
    3658           10 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3659              :                                  "can't use a fully-masked loop because no"
    3660              :                                  " conditional operation is available.\n");
    3661           84 :               LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    3662              :             }
    3663              :         }
    3664              :     }
    3665              : 
    3666              :   /* If that fails, try asking for a target-specific built-in function.  */
    3667        17280 :   if (ifn == IFN_LAST)
    3668              :     {
    3669         9849 :       if (cfn != CFN_LAST)
    3670         8979 :         fndecl = targetm.vectorize.builtin_vectorized_function
    3671         8979 :           (cfn, vectype_out, vectype_in);
    3672          870 :       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
    3673           24 :         fndecl = targetm.vectorize.builtin_md_vectorized_function
    3674           24 :           (callee, vectype_out, vectype_in);
    3675              :     }
    3676              : 
    3677        17280 :   if (ifn == IFN_LAST && !fndecl)
    3678              :     {
    3679         9469 :       if (cfn == CFN_GOMP_SIMD_LANE
    3680         3207 :           && SLP_TREE_LANES (slp_node) == 1
    3681         3207 :           && loop_vinfo
    3682         3207 :           && LOOP_VINFO_LOOP (loop_vinfo)->simduid
    3683         3207 :           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
    3684        15883 :           && LOOP_VINFO_LOOP (loop_vinfo)->simduid
    3685         3207 :              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
    3686              :         {
    3687              :           /* We can handle IFN_GOMP_SIMD_LANE by returning a
    3688              :              { 0, 1, 2, ... vf - 1 } vector.  */
    3689         3207 :           gcc_assert (nargs == 0);
    3690              :         }
    3691         6262 :       else if (modifier == NONE
    3692         6262 :                && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
    3693         5924 :                    || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
    3694         5869 :                    || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
    3695         5824 :                    || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
    3696          126 :         return vectorizable_bswap (vinfo, stmt_info, gsi, slp_node,
    3697          126 :                                    slp_op, vectype_in, cost_vec);
    3698              :       else
    3699              :         {
    3700         6136 :           if (dump_enabled_p ())
    3701          274 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3702              :                              "function is not vectorizable.\n");
    3703         6136 :           return false;
    3704              :         }
    3705              :     }
    3706              : 
    3707        11018 :   int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
    3708        11018 :   internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
    3709        11018 :                          ? ifn : get_conditional_internal_fn (ifn));
    3710        11018 :   internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
    3711        11018 :   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
    3712         9158 :   vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
    3713        11018 :   unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
    3714        11018 :   if (cost_vec) /* transformation not required.  */
    3715              :     {
    3716        21685 :       for (i = 0; i < nargs; ++i)
    3717        14848 :         if (!vect_maybe_update_slp_op_vectype (slp_op[i],
    3718        14848 :                                                vectypes[i]
    3719              :                                                ? vectypes[i] : vectype_in))
    3720              :           {
    3721            0 :             if (dump_enabled_p ())
    3722            0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3723              :                                "incompatible vector types for invariants\n");
    3724            0 :             return false;
    3725              :           }
    3726         6837 :       SLP_TREE_TYPE (slp_node) = call_vec_info_type;
    3727         6837 :       DUMP_VECT_SCOPE ("vectorizable_call");
    3728         6837 :       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
    3729              : 
    3730         6837 :       if (loop_vinfo
    3731         5903 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
    3732         4056 :           && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
    3733              :         {
    3734         2558 :           if (reduc_idx >= 0
    3735         1631 :               && (cond_fn == IFN_LAST
    3736         1631 :                   || !direct_internal_fn_supported_p (cond_fn, vectype_out,
    3737              :                                                       OPTIMIZE_FOR_SPEED))
    3738         2570 :               && (cond_len_fn == IFN_LAST
    3739           12 :                   || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
    3740              :                                                       OPTIMIZE_FOR_SPEED)))
    3741              :             {
    3742           12 :               if (dump_enabled_p ())
    3743            6 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3744              :                                  "can't use a fully-masked loop because no"
    3745              :                                  " conditional operation is available.\n");
    3746           12 :               LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    3747              :             }
    3748              :           else
    3749              :             {
    3750         2546 :               tree scalar_mask = NULL_TREE;
    3751         2546 :               if (mask_opno >= 0)
    3752         2546 :                 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
    3753         2546 :               if (cond_len_fn != IFN_LAST
    3754         2546 :                   && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
    3755              :                                                      OPTIMIZE_FOR_SPEED))
    3756            0 :                 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
    3757              :                                       1);
    3758              :               else
    3759         2546 :                 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
    3760              :                                        scalar_mask);
    3761              :             }
    3762              :         }
    3763         6837 :       return true;
    3764              :     }
    3765              : 
    3766              :   /* Transform.  */
    3767              : 
    3768         4181 :   if (dump_enabled_p ())
    3769          416 :     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
    3770              : 
    3771              :   /* Handle def.  */
    3772         4181 :   scalar_dest = gimple_call_lhs (stmt);
    3773         4181 :   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
    3774              : 
    3775         4181 :   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
    3776         3255 :   bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
    3777         4181 :   unsigned int vect_nargs = nargs;
    3778         4181 :   if (len_loop_p && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
    3779              :     {
    3780            0 :       ifn = cond_len_fn;
    3781              :       /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS.  */
    3782            0 :       vect_nargs += 2;
    3783              :       /* But unless there's a mask argument already we need that
    3784              :          as well, and an else value.  */
    3785            0 :       if (mask_opno == -1)
    3786            0 :         vect_nargs += 2;
    3787              :     }
    3788         4181 :   else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))
    3789              :     {
    3790            0 :       ifn = cond_fn;
    3791            0 :       vect_nargs += 2;
    3792              :     }
    3793         4181 :   int len_opno = internal_fn_len_index (ifn);
    3794         4181 :   if (clz_ctz_arg1)
    3795           59 :     ++vect_nargs;
    3796              : 
    3797         4181 :   if (modifier == NONE || ifn != IFN_LAST)
    3798              :     {
    3799         4149 :       tree prev_res = NULL_TREE;
    3800         4149 :       vargs.safe_grow (vect_nargs, true);
    3801         4149 :       auto_vec<vec<tree> > vec_defs (nargs);
    3802              : 
    3803              :       /* Build argument list for the vectorized call.  */
    3804         4149 :       if (cfn == CFN_GOMP_SIMD_LANE)
    3805              :         {
    3806         3308 :           for (i = 0; i < nvectors; ++i)
    3807              :             {
    3808              :               /* ???  For multi-lane SLP we'd need to build
    3809              :                  { 0, 0, .., 1, 1, ... }.  */
    3810         1708 :               tree cst = build_index_vector (vectype_out,
    3811              :                                              i * nunits_out, 1);
    3812         1708 :               tree new_var
    3813         1708 :                 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
    3814         1708 :               gimple *init_stmt = gimple_build_assign (new_var, cst);
    3815         1708 :               vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
    3816         1708 :               new_temp = make_ssa_name (vec_dest);
    3817         1708 :               gimple *new_stmt = gimple_build_assign (new_temp, new_var);
    3818         1708 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3819         1708 :               slp_node->push_vec_def (new_stmt);
    3820              :             }
    3821              :         }
    3822              :       else
    3823              :         {
    3824         2549 :           vec<tree> vec_oprnds0;
    3825         2549 :           vect_get_slp_defs (vinfo, slp_node, &vec_defs);
    3826         2549 :           vec_oprnds0 = vec_defs[0];
    3827              : 
    3828              :           /* Arguments are ready.  Create the new vector stmt.  */
    3829         5251 :           FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
    3830              :             {
    3831         2702 :               int varg = 0;
    3832              :               /* Add the mask if necessary.  */
    3833           38 :               if ((masked_loop_p || len_loop_p) && mask_opno == -1
    3834         2704 :                   && internal_fn_mask_index (ifn) != -1)
    3835              :                 {
    3836            0 :                   gcc_assert (internal_fn_mask_index (ifn) == varg);
    3837            0 :                   if (masked_loop_p)
    3838              :                     {
    3839            0 :                       unsigned int vec_num = vec_oprnds0.length ();
    3840            0 :                       vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi,
    3841              :                                                           masks, vec_num,
    3842              :                                                           vectype_out, i);
    3843              :                     }
    3844              :                   else
    3845              :                     {
    3846            0 :                       tree mask_vectype = truth_type_for (vectype_out);
    3847            0 :                       vargs[varg++] = vect_build_all_ones_mask (loop_vinfo,
    3848              :                                                                 stmt_info,
    3849              :                                                                 mask_vectype);
    3850              :                     }
    3851              :                 }
    3852              :               size_t k;
    3853         9911 :               for (k = 0; k < nargs; k++)
    3854              :                 {
    3855         7209 :                   vec<tree> vec_oprndsk = vec_defs[k];
    3856         7209 :                   vargs[varg++] = vec_oprndsk[i];
    3857              :                 }
    3858              :               /* Add the else value if necessary.  */
    3859           38 :               if ((masked_loop_p || len_loop_p) && mask_opno == -1
    3860         2704 :                   && internal_fn_else_index (ifn) != -1)
    3861              :                 {
    3862            0 :                   gcc_assert (internal_fn_else_index (ifn) == varg);
    3863            0 :                   if (reduc_idx >= 0)
    3864            0 :                     vargs[varg++] = vargs[reduc_idx + 1];
    3865              :                   else
    3866              :                     {
    3867            0 :                       auto else_value = targetm.preferred_else_value
    3868            0 :                         (ifn, vectype_out, varg - 1, &vargs[1]);
    3869            0 :                       vargs[varg++] = else_value;
    3870              :                     }
    3871              :                 }
    3872         2702 :               if (clz_ctz_arg1)
    3873           59 :                 vargs[varg++] = clz_ctz_arg1;
    3874              : 
    3875         2702 :               gimple *new_stmt;
    3876         2702 :               if (modifier == NARROW)
    3877              :                 {
    3878              :                   /* We don't define any narrowing conditional functions
    3879              :                      at present.  */
    3880            0 :                   gcc_assert (mask_opno < 0);
    3881            0 :                   tree half_res = make_ssa_name (vectype_in);
    3882            0 :                   gcall *call = gimple_build_call_internal_vec (ifn, vargs);
    3883            0 :                   gimple_call_set_lhs (call, half_res);
    3884            0 :                   gimple_call_set_nothrow (call, true);
    3885            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    3886            0 :                   if ((i & 1) == 0)
    3887              :                     {
    3888            0 :                       prev_res = half_res;
    3889            0 :                       continue;
    3890              :                     }
    3891            0 :                   new_temp = make_ssa_name (vec_dest);
    3892            0 :                   new_stmt = vect_gimple_build (new_temp, convert_code,
    3893              :                                                 prev_res, half_res);
    3894            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    3895              :                 }
    3896              :               else
    3897              :                 {
    3898         2702 :                   if (len_opno >= 0 && len_loop_p)
    3899              :                     {
    3900            0 :                       unsigned int vec_num = vec_oprnds0.length ();
    3901            0 :                       tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
    3902              :                                                     vec_num, vectype_out, i, 1, true);
    3903            0 :                       signed char biasval
    3904            0 :                         = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
    3905            0 :                       tree bias = build_int_cst (intQI_type_node, biasval);
    3906            0 :                       vargs[len_opno] = len;
    3907            0 :                       vargs[len_opno + 1] = bias;
    3908              :                     }
    3909         2702 :                   else if (mask_opno >= 0 && masked_loop_p)
    3910              :                     {
    3911           36 :                       unsigned int vec_num = vec_oprnds0.length ();
    3912           36 :                       tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
    3913              :                                                       vec_num, vectype_out, i);
    3914           36 :                       vargs[mask_opno]
    3915           72 :                         = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
    3916           36 :                                             vargs[mask_opno], gsi);
    3917              :                     }
    3918              : 
    3919         2702 :                   gcall *call;
    3920         2702 :                   if (ifn != IFN_LAST)
    3921         2621 :                     call = gimple_build_call_internal_vec (ifn, vargs);
    3922              :                   else
    3923           81 :                     call = gimple_build_call_vec (fndecl, vargs);
    3924         2702 :                   new_temp = make_ssa_name (vec_dest, call);
    3925         2702 :                   gimple_call_set_lhs (call, new_temp);
    3926         2702 :                   gimple_call_set_nothrow (call, true);
    3927         2702 :                   vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    3928         2702 :                   new_stmt = call;
    3929              :                 }
    3930         2702 :               slp_node->push_vec_def (new_stmt);
    3931              :             }
    3932              :         }
    3933              : 
    3934        10996 :       for (i = 0; i < nargs; i++)
    3935              :         {
    3936         6847 :           vec<tree> vec_oprndsi = vec_defs[i];
    3937         6847 :           vec_oprndsi.release ();
    3938              :         }
    3939         4149 :     }
    3940           32 :   else if (modifier == NARROW)
    3941              :     {
    3942           32 :       auto_vec<vec<tree> > vec_defs (nargs);
    3943              :       /* We don't define any narrowing conditional functions at present.  */
    3944           32 :       gcc_assert (mask_opno < 0);
    3945              : 
    3946              :       /* Build argument list for the vectorized call.  */
    3947           32 :       vargs.create (nargs * 2);
    3948              : 
    3949           32 :       vect_get_slp_defs (vinfo, slp_node, &vec_defs);
    3950           32 :       vec<tree> vec_oprnds0 = vec_defs[0];
    3951              : 
    3952              :       /* Arguments are ready.  Create the new vector stmt.  */
    3953           64 :       for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
    3954              :         {
    3955           32 :           size_t k;
    3956           32 :           vargs.truncate (0);
    3957           64 :           for (k = 0; k < nargs; k++)
    3958              :             {
    3959           32 :               vec<tree> vec_oprndsk = vec_defs[k];
    3960           32 :               vargs.quick_push (vec_oprndsk[i]);
    3961           32 :               vargs.quick_push (vec_oprndsk[i + 1]);
    3962              :             }
    3963           32 :           gcall *call;
    3964           32 :           if (ifn != IFN_LAST)
    3965              :             call = gimple_build_call_internal_vec (ifn, vargs);
    3966              :           else
    3967           32 :             call = gimple_build_call_vec (fndecl, vargs);
    3968           32 :           new_temp = make_ssa_name (vec_dest, call);
    3969           32 :           gimple_call_set_lhs (call, new_temp);
    3970           32 :           gimple_call_set_nothrow (call, true);
    3971           32 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    3972           32 :           slp_node->push_vec_def (call);
    3973              :         }
    3974              : 
    3975           64 :       for (i = 0; i < nargs; i++)
    3976              :         {
    3977           32 :           vec<tree> vec_oprndsi = vec_defs[i];
    3978           32 :           vec_oprndsi.release ();
    3979              :         }
    3980           32 :     }
    3981              :   else
    3982              :     /* No current target implements this case.  */
    3983              :     return false;
    3984              : 
    3985         4181 :   vargs.release ();
    3986              : 
    3987         4181 :   return true;
    3988      2667715 : }
    3989              : 
    3990              : 
    3991              : struct simd_call_arg_info
    3992              : {
    3993              :   tree vectype;
    3994              :   tree op;
    3995              :   HOST_WIDE_INT linear_step;
    3996              :   enum vect_def_type dt;
    3997              :   unsigned int align;
    3998              :   bool simd_lane_linear;
    3999              : };
    4000              : 
    4001              : /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
    4002              :    is linear within simd lane (but not within whole loop), note it in
    4003              :    *ARGINFO.  */
    4004              : 
    4005              : static void
    4006           15 : vect_simd_lane_linear (tree op, class loop *loop,
    4007              :                        struct simd_call_arg_info *arginfo)
    4008              : {
    4009           15 :   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
    4010              : 
    4011           15 :   if (!is_gimple_assign (def_stmt)
    4012           15 :       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
    4013           27 :       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
    4014            3 :     return;
    4015              : 
    4016           12 :   tree base = gimple_assign_rhs1 (def_stmt);
    4017           12 :   HOST_WIDE_INT linear_step = 0;
    4018           12 :   tree v = gimple_assign_rhs2 (def_stmt);
    4019           48 :   while (TREE_CODE (v) == SSA_NAME)
    4020              :     {
    4021           36 :       tree t;
    4022           36 :       def_stmt = SSA_NAME_DEF_STMT (v);
    4023           36 :       if (is_gimple_assign (def_stmt))
    4024           24 :         switch (gimple_assign_rhs_code (def_stmt))
    4025              :           {
    4026            0 :           case PLUS_EXPR:
    4027            0 :             t = gimple_assign_rhs2 (def_stmt);
    4028            0 :             if (linear_step || TREE_CODE (t) != INTEGER_CST)
    4029              :               return;
    4030            0 :             base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
    4031            0 :             v = gimple_assign_rhs1 (def_stmt);
    4032            0 :             continue;
    4033           12 :           case MULT_EXPR:
    4034           12 :             t = gimple_assign_rhs2 (def_stmt);
    4035           12 :             if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
    4036            0 :               return;
    4037           12 :             linear_step = tree_to_shwi (t);
    4038           12 :             v = gimple_assign_rhs1 (def_stmt);
    4039           12 :             continue;
    4040           12 :           CASE_CONVERT:
    4041           12 :             t = gimple_assign_rhs1 (def_stmt);
    4042           12 :             if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
    4043           12 :                 || (TYPE_PRECISION (TREE_TYPE (v))
    4044           12 :                     < TYPE_PRECISION (TREE_TYPE (t))))
    4045              :               return;
    4046           12 :             if (!linear_step)
    4047            0 :               linear_step = 1;
    4048           12 :             v = t;
    4049           12 :             continue;
    4050              :           default:
    4051              :             return;
    4052              :           }
    4053           12 :       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
    4054           12 :                && loop->simduid
    4055           12 :                && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
    4056           24 :                && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
    4057              :                    == loop->simduid))
    4058              :         {
    4059           12 :           if (!linear_step)
    4060            0 :             linear_step = 1;
    4061           12 :           arginfo->linear_step = linear_step;
    4062           12 :           arginfo->op = base;
    4063           12 :           arginfo->simd_lane_linear = true;
    4064           12 :           return;
    4065              :         }
    4066              :     }
    4067              : }
    4068              : 
    4069              : /* Function vectorizable_simd_clone_call.
    4070              : 
    4071              :    Check if STMT_INFO performs a function call that can be vectorized
    4072              :    by calling a simd clone of the function.
    4073              :    If COST_VEC is passed, calculate costs but don't change anything,
    4074              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    4075              :    it, and insert it at GSI.
    4076              :    Return true if STMT_INFO is vectorizable in this way.  */
    4077              : 
    4078              : static bool
    4079      2657000 : vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
    4080              :                               gimple_stmt_iterator *gsi,
    4081              :                               slp_tree slp_node,
    4082              :                               stmt_vector_for_cost *cost_vec)
    4083              : {
    4084      2657000 :   tree vec_dest;
    4085      2657000 :   tree scalar_dest;
    4086      2657000 :   tree vec_oprnd0 = NULL_TREE;
    4087      2657000 :   tree vectype;
    4088      2657000 :   poly_uint64 nunits;
    4089      2657000 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    4090      2657000 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    4091      2657000 :   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    4092      2657000 :   tree fndecl, new_temp;
    4093      2657000 :   int j;
    4094      2657000 :   auto_vec<simd_call_arg_info> arginfo;
    4095      2657000 :   vec<tree> vargs = vNULL;
    4096      2657000 :   size_t i, nargs;
    4097      2657000 :   tree rtype, ratype;
    4098      2657000 :   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
    4099      2657000 :   int masked_call_offset = 0;
    4100              : 
    4101              :   /* Is STMT a vectorizable call?   */
    4102      2657000 :   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
    4103        15354 :   if (!stmt)
    4104              :     return false;
    4105              : 
    4106        15354 :   fndecl = gimple_call_fndecl (stmt);
    4107        15354 :   if (fndecl == NULL_TREE
    4108        15354 :       && gimple_call_internal_p (stmt, IFN_MASK_CALL))
    4109              :     {
    4110          220 :       fndecl = gimple_call_arg (stmt, 0);
    4111          220 :       gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
    4112          220 :       fndecl = TREE_OPERAND (fndecl, 0);
    4113          220 :       gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
    4114              :       masked_call_offset = 1;
    4115              :     }
    4116        15134 :   if (fndecl == NULL_TREE)
    4117              :     return false;
    4118              : 
    4119         4921 :   struct cgraph_node *node = cgraph_node::get (fndecl);
    4120         4921 :   if (node == NULL || node->simd_clones == NULL)
    4121              :     return false;
    4122              : 
    4123         1476 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    4124              :     return false;
    4125              : 
    4126         1476 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    4127            0 :       && cost_vec)
    4128              :     return false;
    4129              : 
    4130         1476 :   if (gimple_call_lhs (stmt)
    4131         1476 :       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
    4132              :     return false;
    4133              : 
    4134         1476 :   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
    4135              : 
    4136         1476 :   vectype = SLP_TREE_VECTYPE (slp_node);
    4137              : 
    4138      2657064 :   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
    4139              :     return false;
    4140              : 
    4141              :   /* Process function arguments.  */
    4142         1476 :   nargs = gimple_call_num_args (stmt) - masked_call_offset;
    4143              : 
    4144              :   /* Bail out if the function has zero arguments.  */
    4145         1476 :   if (nargs == 0)
    4146              :     return false;
    4147              : 
    4148         1412 :   vect_simd_clone_data _data;
    4149         1412 :   vect_simd_clone_data &data = slp_node->get_data (_data);
    4150         1412 :   vec<tree>& simd_clone_info = data.simd_clone_info;
    4151         1412 :   arginfo.reserve (nargs, true);
    4152         1412 :   auto_vec<slp_tree> slp_op;
    4153         1412 :   slp_op.safe_grow_cleared (nargs);
    4154              : 
    4155         4053 :   for (i = 0; i < nargs; i++)
    4156              :     {
    4157         2641 :       simd_call_arg_info thisarginfo;
    4158         2641 :       affine_iv iv;
    4159         2641 :       tree op;
    4160              : 
    4161         2641 :       thisarginfo.linear_step = 0;
    4162         2641 :       thisarginfo.align = 0;
    4163         2641 :       thisarginfo.op = NULL_TREE;
    4164         2641 :       thisarginfo.simd_lane_linear = false;
    4165              : 
    4166         5282 :       int op_no = vect_slp_child_index_for_operand (stmt_info,
    4167         2641 :                                                     i + masked_call_offset);
    4168         5282 :       if (!vect_is_simple_use (vinfo, slp_node,
    4169         2641 :                                op_no, &op, &slp_op[i],
    4170              :                                &thisarginfo.dt, &thisarginfo.vectype)
    4171         2641 :           || thisarginfo.dt == vect_uninitialized_def)
    4172              :         {
    4173            0 :           if (dump_enabled_p ())
    4174            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    4175              :                              "use not simple.\n");
    4176            0 :           return false;
    4177              :         }
    4178              : 
    4179         2641 :       if (thisarginfo.dt == vect_constant_def
    4180         2641 :           || thisarginfo.dt == vect_external_def)
    4181              :         {
    4182              :           /* With SLP we determine the vector type of constants/externals
    4183              :              at analysis time, handling conflicts via
    4184              :              vect_maybe_update_slp_op_vectype.  At transform time
    4185              :              we have a vector type recorded for SLP.  */
    4186          680 :           gcc_assert (cost_vec
    4187              :                       || thisarginfo.vectype != NULL_TREE);
    4188              :           if (cost_vec)
    4189          549 :             thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
    4190          549 :                                                                TREE_TYPE (op),
    4191              :                                                                slp_node);
    4192              :         }
    4193              :       else
    4194         1961 :         gcc_assert (thisarginfo.vectype != NULL_TREE);
    4195              : 
    4196              :       /* For linear arguments, the analyze phase should have saved
    4197              :          the base and step.  */
    4198         2510 :       if (!cost_vec
    4199         1594 :           && i * 3 + 4 <= simd_clone_info.length ()
    4200         2720 :           && simd_clone_info[i * 3 + 2])
    4201              :         {
    4202          118 :           thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
    4203          118 :           thisarginfo.op = simd_clone_info[i * 3 + 1];
    4204          118 :           thisarginfo.simd_lane_linear
    4205          118 :             = (simd_clone_info[i * 3 + 3] == boolean_true_node);
    4206              :           /* If loop has been peeled for alignment, we need to adjust it.  */
    4207          118 :           tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
    4208          118 :           tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
    4209          118 :           if (n1 != n2 && !thisarginfo.simd_lane_linear)
    4210              :             {
    4211            0 :               tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
    4212            0 :               tree step = simd_clone_info[i * 3 + 2];
    4213            0 :               tree opt = TREE_TYPE (thisarginfo.op);
    4214            0 :               bias = fold_convert (TREE_TYPE (step), bias);
    4215            0 :               bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
    4216            0 :               thisarginfo.op
    4217            0 :                 = fold_build2 (POINTER_TYPE_P (opt)
    4218              :                                ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
    4219              :                                thisarginfo.op, bias);
    4220              :             }
    4221              :         }
    4222         2523 :       else if (cost_vec
    4223         1844 :                && thisarginfo.dt != vect_constant_def
    4224         1717 :                && thisarginfo.dt != vect_external_def
    4225         1295 :                && loop_vinfo
    4226         1290 :                && SLP_TREE_LANES (slp_node) == 1
    4227         1266 :                && TREE_CODE (op) == SSA_NAME
    4228         2532 :                && simple_iv (loop, loop_containing_stmt (stmt), op,
    4229              :                              &iv, false)
    4230         2735 :                && tree_fits_shwi_p (iv.step))
    4231              :         {
    4232          212 :           thisarginfo.linear_step = tree_to_shwi (iv.step);
    4233          212 :           thisarginfo.op = iv.base;
    4234              :         }
    4235         2311 :       else if ((thisarginfo.dt == vect_constant_def
    4236         2311 :                 || thisarginfo.dt == vect_external_def)
    4237          680 :                && SLP_TREE_LANES (slp_node) == 1
    4238         2617 :                && POINTER_TYPE_P (TREE_TYPE (op)))
    4239           86 :         thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
    4240              :       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
    4241              :          linear too.  */
    4242         2641 :       if (SLP_TREE_LANES (slp_node) == 1
    4243         2221 :           && POINTER_TYPE_P (TREE_TYPE (op))
    4244          196 :           && !thisarginfo.linear_step
    4245          112 :           && cost_vec
    4246           58 :           && thisarginfo.dt != vect_constant_def
    4247           58 :           && thisarginfo.dt != vect_external_def
    4248           15 :           && loop_vinfo
    4249         2656 :           && TREE_CODE (op) == SSA_NAME)
    4250           15 :         vect_simd_lane_linear (op, loop, &thisarginfo);
    4251              : 
    4252         2641 :       if (!vectype)
    4253           12 :         vectype = thisarginfo.vectype;
    4254         2641 :       arginfo.quick_push (thisarginfo);
    4255              :     }
    4256              : 
    4257         1412 :   poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
    4258         1412 :   unsigned group_size = SLP_TREE_LANES (slp_node);
    4259         1412 :   unsigned int badness = 0;
    4260         1412 :   unsigned int badness_inbranch = 0;
    4261         1412 :   struct cgraph_node *bestn = NULL;
    4262         1412 :   struct cgraph_node *bestn_inbranch = NULL;
    4263         1412 :   if (!cost_vec)
    4264          362 :     bestn = ((loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
    4265          362 :              ? data.clone_inbranch : data.clone);
    4266              :   else
    4267         6076 :     for (struct cgraph_node *n = node->simd_clones; n != NULL;
    4268         5026 :          n = n->simdclone->next_clone)
    4269              :       {
    4270         5026 :         unsigned int this_badness = 0;
    4271         5026 :         unsigned int num_calls;
    4272              :         /* The number of arguments in the call and the number of parameters in
    4273              :            the simdclone should match.  However, when the simdclone is
    4274              :            'inbranch', it could have one more parameter than nargs when using
    4275              :            an inbranch simdclone to call a non-inbranch call, either in a
    4276              :            non-masked loop using a all true constant mask, or inside a masked
    4277              :            loop using it's mask.  */
    4278         5026 :         size_t simd_nargs = n->simdclone->nargs;
    4279         5026 :         if (!masked_call_offset && n->simdclone->inbranch)
    4280         2271 :           simd_nargs--;
    4281         5026 :         if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
    4282              :                                   &num_calls)
    4283         1974 :             || (!n->simdclone->inbranch && (masked_call_offset > 0))
    4284         1790 :             || (nargs != simd_nargs))
    4285         3236 :           continue;
    4286         1790 :         if (num_calls != 1)
    4287         1142 :           this_badness += floor_log2 (num_calls) * 4096;
    4288         1790 :         if (n->simdclone->inbranch)
    4289          771 :           this_badness += 8192;
    4290              : 
    4291              :         /* If SLP_TREE_VECTYPE has not been set yet pass the general vector
    4292              :            mode,  which for targets that use it will determine what ISA we can
    4293              :            vectorize this code with.  */
    4294         1790 :         machine_mode vector_mode = vinfo->vector_mode;
    4295         1790 :         if (vectype)
    4296         1790 :           vector_mode = TYPE_MODE (vectype);
    4297         1790 :         int target_badness = targetm.simd_clone.usable (n, vector_mode);
    4298         1790 :         if (target_badness < 0)
    4299          368 :           continue;
    4300         1422 :         this_badness += target_badness * 512;
    4301         4192 :         for (i = 0; i < nargs; i++)
    4302              :           {
    4303         3018 :             switch (n->simdclone->args[i].arg_type)
    4304              :               {
    4305         2088 :               case SIMD_CLONE_ARG_TYPE_VECTOR:
    4306         2088 :                 if (VECTOR_BOOLEAN_TYPE_P (n->simdclone->args[i].vector_type))
    4307              :                   /* Vector mask arguments are not supported.  */
    4308              :                   i = -1;
    4309         2080 :                 else if (!useless_type_conversion_p
    4310         2080 :                          (n->simdclone->args[i].orig_type,
    4311         2080 :                           TREE_TYPE (gimple_call_arg (stmt,
    4312              :                                                       i + masked_call_offset))))
    4313              :                   i = -1;
    4314         2080 :                 else if (arginfo[i].dt == vect_constant_def
    4315         1973 :                          || arginfo[i].dt == vect_external_def
    4316         3989 :                          || arginfo[i].linear_step)
    4317          399 :                   this_badness += 64;
    4318              :                 break;
    4319          310 :               case SIMD_CLONE_ARG_TYPE_UNIFORM:
    4320          310 :                 if ((arginfo[i].dt != vect_constant_def
    4321          145 :                      && arginfo[i].dt != vect_external_def)
    4322          410 :                     || SLP_TREE_LANES (slp_node) != 1)
    4323              :                   i = -1;
    4324              :                 break;
    4325          324 :               case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
    4326          324 :               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
    4327          324 :                 if (arginfo[i].dt == vect_constant_def
    4328          324 :                     || arginfo[i].dt == vect_external_def
    4329          324 :                     || (arginfo[i].linear_step
    4330          324 :                         != n->simdclone->args[i].linear_step))
    4331              :                   i = -1;
    4332              :                 break;
    4333              :               case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
    4334              :               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
    4335              :               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
    4336              :               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
    4337              :               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
    4338              :               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
    4339              :                 /* FORNOW */
    4340              :                 i = -1;
    4341              :                 break;
    4342          296 :               case SIMD_CLONE_ARG_TYPE_MASK:
    4343          296 :                 if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
    4344          264 :                     && n->simdclone->mask_mode != VOIDmode)
    4345              :                   i = -1;
    4346              :                 /* While we can create a traditional data vector from
    4347              :                    an incoming integer mode mask we have no good way to
    4348              :                    force generate an integer mode mask from a traditional
    4349              :                    boolean vector input.  */
    4350          296 :                 else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
    4351          296 :                          && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
    4352              :                   i = -1;
    4353          290 :                 else if (n->simdclone->mask_mode == VOIDmode
    4354              :                          /* FORNOW we only have partial support for vector-type
    4355              :                             masks that can't hold all of simdlen. */
    4356          554 :                          && (maybe_ne (TYPE_VECTOR_SUBPARTS (n->simdclone->args[i].vector_type),
    4357          264 :                                        TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
    4358              :                              /* Verify we can compute the mask argument.  */
    4359          111 :                              || !expand_vec_cond_expr_p (n->simdclone->args[i].vector_type,
    4360          111 :                                                          arginfo[i].vectype)))
    4361              :                   i = -1;
    4362          125 :                 else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
    4363              :                          /* FORNOW we only have partial support for
    4364              :                             integer-type masks that represent the same number
    4365              :                             of lanes as the vectorized mask inputs.  */
    4366          151 :                          && maybe_ne (exact_div (n->simdclone->simdlen,
    4367              :                                                  n->simdclone->args[i].linear_step),
    4368           26 :                                       TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
    4369              :                   i = -1;
    4370          107 :                 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
    4371          107 :                          && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
    4372            8 :                   this_badness += 2048;
    4373              :                 break;
    4374              :               }
    4375          183 :             if (i == (size_t) -1)
    4376              :               break;
    4377         2770 :             if (n->simdclone->args[i].alignment > arginfo[i].align)
    4378              :               {
    4379              :                 i = -1;
    4380              :                 break;
    4381              :               }
    4382         2770 :             if (arginfo[i].align)
    4383          110 :               this_badness += (exact_log2 (arginfo[i].align)
    4384          160 :                                - exact_log2 (n->simdclone->args[i].alignment));
    4385              :           }
    4386         1422 :         if (i == (size_t) -1)
    4387          248 :           continue;
    4388         1174 :         if (masked_call_offset == 0
    4389         1067 :             && n->simdclone->inbranch
    4390          347 :             && n->simdclone->nargs > nargs)
    4391              :           {
    4392          347 :             gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
    4393              :                         SIMD_CLONE_ARG_TYPE_MASK);
    4394              :             /* Penalize using a masked SIMD clone in a non-masked loop, that is
    4395              :                not in a branch, as we'd have to construct an all-true mask.  */
    4396          347 :             this_badness += 64;
    4397              :           }
    4398         1174 :         if (bestn == NULL || this_badness < badness)
    4399              :           {
    4400          817 :             bestn = n;
    4401          817 :             badness = this_badness;
    4402              :           }
    4403         1174 :         if (n->simdclone->inbranch
    4404          454 :             && (bestn_inbranch == NULL || this_badness < badness_inbranch))
    4405              :           {
    4406         5026 :             bestn_inbranch = n;
    4407         5026 :             badness_inbranch = this_badness;
    4408              :           }
    4409              :       }
    4410              : 
    4411         1412 :   if (bestn == NULL)
    4412              :     return false;
    4413              : 
    4414          829 :   fndecl = bestn->decl;
    4415          829 :   nunits = bestn->simdclone->simdlen;
    4416          829 :   int ncopies = vector_unroll_factor (vf * group_size, nunits);
    4417              : 
    4418              :   /* If the function isn't const, only allow it in simd loops where user
    4419              :      has asserted that at least nunits consecutive iterations can be
    4420              :      performed using SIMD instructions.  */
    4421          824 :   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
    4422         1006 :       && gimple_vuse (stmt))
    4423              :     return false;
    4424              : 
    4425              :   /* ncopies is the number of SIMD clone calls we create, since simdlen
    4426              :      is not necessarily matching nunits of the vector types used, track
    4427              :      that in ncopies_in.  */
    4428          829 :   int ncopies_in = vect_get_num_vectors (vf * group_size, vectype);
    4429              : 
    4430              :   /* Sanity check: make sure that at least one copy of the vectorized stmt
    4431              :      needs to be generated.  */
    4432          829 :   gcc_assert (ncopies >= 1);
    4433              : 
    4434          829 :   if (cost_vec) /* transformation not required.  */
    4435              :     {
    4436         1514 :       for (unsigned i = 0; i < nargs; ++i)
    4437         1047 :         if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
    4438              :           {
    4439            0 :             if (dump_enabled_p ())
    4440            0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    4441              :                                "incompatible vector types for invariants\n");
    4442            0 :             return false;
    4443              :           }
    4444              : 
    4445          467 :       if (!bestn_inbranch && loop_vinfo)
    4446              :         {
    4447          248 :           if (dump_enabled_p ()
    4448          248 :               && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
    4449          171 :             dump_printf_loc (MSG_NOTE, vect_location,
    4450              :                              "can't use a fully-masked loop because no"
    4451              :                              " masked simd clone was available.\n");
    4452          248 :           LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    4453              :         }
    4454              : 
    4455              :       /* When the original call is pure or const but the SIMD ABI dictates
    4456              :          an aggregate return we will have to use a virtual definition and
    4457              :          in a loop eventually even need to add a virtual PHI.  That's
    4458              :          not straight-forward so allow to fix this up via renaming.  */
    4459          467 :       if (gimple_call_lhs (stmt)
    4460          461 :           && !gimple_vdef (stmt)
    4461          832 :           && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
    4462           27 :         vinfo->any_known_not_updated_vssa = true;
    4463              :       /* ???  For SLP code-gen we end up inserting after the last
    4464              :          vector argument def rather than at the original call position
    4465              :          so automagic virtual operand updating doesn't work.  */
    4466          934 :       if (gimple_vuse (stmt))
    4467          139 :         vinfo->any_known_not_updated_vssa = true;
    4468              : 
    4469          467 :       data.clone = bestn;
    4470          467 :       data.clone_inbranch = bestn_inbranch;
    4471              : 
    4472          467 :       simd_clone_info.safe_push (NULL_TREE);
    4473         1663 :       for (i = 0;
    4474         2502 :            i < (bestn_inbranch ? bestn_inbranch : bestn)->simdclone->nargs; i++)
    4475              :         {
    4476         1196 :           if (loop_vinfo
    4477         1190 :               && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
    4478          482 :               && (bestn_inbranch->simdclone->args[i].arg_type
    4479              :                   == SIMD_CLONE_ARG_TYPE_MASK))
    4480              :             {
    4481          174 :               if (masked_call_offset)
    4482              :                 /* When there is an explicit mask we require the
    4483              :                    number of elements to match up.  */
    4484           49 :                 vect_record_loop_mask (loop_vinfo,
    4485              :                                        &LOOP_VINFO_MASKS (loop_vinfo),
    4486              :                                        ncopies_in, vectype, NULL_TREE);
    4487              :               else
    4488              :                 {
    4489              :                   /* When there is no explicit mask on the call we have
    4490              :                      more relaxed requirements.  */
    4491          125 :                   tree masktype;
    4492          125 :                   poly_uint64 callee_nelements;
    4493          125 :                   if (SCALAR_INT_MODE_P (bestn_inbranch->simdclone->mask_mode))
    4494              :                     {
    4495           12 :                       callee_nelements
    4496           12 :                           = exact_div (bestn_inbranch->simdclone->simdlen,
    4497              :                                        bestn_inbranch->simdclone->args[i].linear_step);
    4498           12 :                       masktype = get_related_vectype_for_scalar_type
    4499           12 :                           (vinfo->vector_mode, TREE_TYPE (vectype),
    4500              :                            callee_nelements);
    4501              :                     }
    4502              :                   else
    4503              :                     {
    4504          113 :                       masktype = bestn_inbranch->simdclone->args[i].vector_type;
    4505              :                       /* The aarch64 port will add custom attributes to types
    4506              :                          for SVE simdclones which make the types different.  We
    4507              :                          should use canonincal types for masks within the
    4508              :                          vectorizer, hence we construct the related vectype
    4509              :                          here.  */
    4510          113 :                       masktype
    4511              :                         = build_truth_vector_type_for_mode
    4512          113 :                           (TYPE_VECTOR_SUBPARTS (masktype),
    4513          113 :                            TYPE_MODE (masktype));
    4514          113 :                       callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
    4515              :                     }
    4516          125 :                   auto o = vector_unroll_factor (nunits, callee_nelements);
    4517          125 :                   vect_record_loop_mask (loop_vinfo,
    4518              :                                          &LOOP_VINFO_MASKS (loop_vinfo),
    4519              :                                          ncopies  * o, masktype, NULL_TREE);
    4520              :                 }
    4521              :             }
    4522         1022 :           else if ((bestn->simdclone->args[i].arg_type
    4523              :                     == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
    4524          915 :                    || (bestn->simdclone->args[i].arg_type
    4525              :                        == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
    4526          904 :                    || (bestn_inbranch
    4527          364 :                        && ((bestn_inbranch->simdclone->args[i].arg_type
    4528              :                             == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
    4529          364 :                            || (bestn_inbranch->simdclone->args[i].arg_type
    4530              :                                == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))))
    4531              :             {
    4532          118 :               simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
    4533          118 :               simd_clone_info.safe_push (arginfo[i].op);
    4534          202 :               tree lst = (POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
    4535          202 :                           ? size_type_node : TREE_TYPE (arginfo[i].op));
    4536          118 :               tree ls = build_int_cst (lst, arginfo[i].linear_step);
    4537          118 :               simd_clone_info.safe_push (ls);
    4538          118 :               tree sll = (arginfo[i].simd_lane_linear
    4539          118 :                           ? boolean_true_node : boolean_false_node);
    4540          118 :               simd_clone_info.safe_push (sll);
    4541              :             }
    4542              :         }
    4543              : 
    4544          467 :       SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
    4545          467 :       slp_node->data = new vect_simd_clone_data (std::move (_data));
    4546          467 :       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
    4547              :       /* ???  We're confused by calls w/o LHS.  */
    4548          467 :       if (SLP_TREE_VECTYPE (slp_node))
    4549          461 :         vect_model_simple_cost (vinfo, ncopies, slp_node, cost_vec);
    4550          467 :       return true;
    4551              :     }
    4552              : 
    4553              :   /* Transform.  */
    4554              : 
    4555          362 :   if (dump_enabled_p ())
    4556          246 :     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
    4557              : 
    4558              :   /* Handle def.  */
    4559          362 :   scalar_dest = gimple_call_lhs (stmt);
    4560          362 :   vec_dest = NULL_TREE;
    4561          362 :   rtype = NULL_TREE;
    4562          362 :   ratype = NULL_TREE;
    4563          362 :   if (scalar_dest)
    4564              :     {
    4565          356 :       vec_dest = vect_create_destination_var (scalar_dest, vectype);
    4566          356 :       rtype = TREE_TYPE (TREE_TYPE (fndecl));
    4567          356 :       if (TREE_CODE (rtype) == ARRAY_TYPE)
    4568              :         {
    4569            9 :           ratype = rtype;
    4570            9 :           rtype = TREE_TYPE (ratype);
    4571              :         }
    4572              :     }
    4573              : 
    4574          724 :   auto_vec<vec<tree> > vec_oprnds;
    4575          362 :   auto_vec<unsigned> vec_oprnds_i;
    4576          362 :   vec_oprnds_i.safe_grow_cleared (nargs, true);
    4577          362 :   vec_oprnds.reserve_exact (nargs);
    4578          362 :   vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
    4579          833 :   for (j = 0; j < ncopies; ++j)
    4580              :     {
    4581          471 :       poly_uint64 callee_nelements;
    4582          471 :       poly_uint64 caller_nelements;
    4583              :       /* Build argument list for the vectorized call.  */
    4584          471 :       if (j == 0)
    4585          362 :         vargs.create (nargs);
    4586              :       else
    4587          109 :         vargs.truncate (0);
    4588              : 
    4589         1580 :       for (i = 0; i < nargs; i++)
    4590              :         {
    4591         1109 :           unsigned int k, l, m, o;
    4592         1109 :           tree atype;
    4593         1109 :           tree op = gimple_call_arg (stmt, i + masked_call_offset);
    4594         1109 :           switch (bestn->simdclone->args[i].arg_type)
    4595              :             {
    4596          820 :             case SIMD_CLONE_ARG_TYPE_VECTOR:
    4597          820 :               atype = bestn->simdclone->args[i].vector_type;
    4598          820 :               caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
    4599          820 :               callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
    4600          820 :               o = vector_unroll_factor (nunits, callee_nelements);
    4601         1870 :               for (m = j * o; m < (j + 1) * o; m++)
    4602              :                 {
    4603         1050 :                   if (known_lt (callee_nelements, caller_nelements))
    4604              :                     {
    4605          516 :                       poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
    4606          258 :                       if (!constant_multiple_p (caller_nelements,
    4607              :                                                 callee_nelements, &k))
    4608            0 :                         gcc_unreachable ();
    4609              : 
    4610          258 :                       gcc_assert ((k & (k - 1)) == 0);
    4611          258 :                       if (m == 0)
    4612              :                         {
    4613           57 :                           vec_oprnds_i[i] = 0;
    4614           57 :                           vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4615              :                         }
    4616              :                       else
    4617              :                         {
    4618          201 :                           vec_oprnd0 = arginfo[i].op;
    4619          201 :                           if ((m & (k - 1)) == 0)
    4620           72 :                             vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4621              :                         }
    4622          258 :                       arginfo[i].op = vec_oprnd0;
    4623          258 :                       vec_oprnd0
    4624          258 :                         = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
    4625          258 :                                   bitsize_int (prec),
    4626          258 :                                   bitsize_int ((m & (k - 1)) * prec));
    4627          258 :                       gassign *new_stmt
    4628          258 :                         = gimple_build_assign (make_ssa_name (atype),
    4629              :                                                vec_oprnd0);
    4630          258 :                       vect_finish_stmt_generation (vinfo, stmt_info,
    4631              :                                                    new_stmt, gsi);
    4632          258 :                       vargs.safe_push (gimple_assign_lhs (new_stmt));
    4633              :                     }
    4634              :                   else
    4635              :                     {
    4636          792 :                       if (!constant_multiple_p (callee_nelements,
    4637              :                                                 caller_nelements, &k))
    4638            0 :                         gcc_unreachable ();
    4639          792 :                       gcc_assert ((k & (k - 1)) == 0);
    4640          792 :                       vec<constructor_elt, va_gc> *ctor_elts;
    4641          792 :                       if (k != 1)
    4642           14 :                         vec_alloc (ctor_elts, k);
    4643              :                       else
    4644          778 :                         ctor_elts = NULL;
    4645          820 :                       for (l = 0; l < k; l++)
    4646              :                         {
    4647          806 :                           if (m == 0 && l == 0)
    4648              :                             {
    4649          454 :                               vec_oprnds_i[i] = 0;
    4650          454 :                               vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4651              :                             }
    4652              :                           else
    4653          352 :                             vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4654          806 :                           arginfo[i].op = vec_oprnd0;
    4655          806 :                           if (k == 1)
    4656              :                             break;
    4657           28 :                           CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
    4658              :                                                   vec_oprnd0);
    4659              :                         }
    4660          792 :                       if (k == 1)
    4661          778 :                         if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
    4662              :                                                        atype))
    4663              :                           {
    4664            0 :                             vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
    4665              :                                                  vec_oprnd0);
    4666            0 :                             gassign *new_stmt
    4667            0 :                               = gimple_build_assign (make_ssa_name (atype),
    4668              :                                                      vec_oprnd0);
    4669            0 :                             vect_finish_stmt_generation (vinfo, stmt_info,
    4670              :                                                          new_stmt, gsi);
    4671            0 :                             vargs.safe_push (gimple_get_lhs (new_stmt));
    4672              :                           }
    4673              :                         else
    4674          778 :                           vargs.safe_push (vec_oprnd0);
    4675              :                       else
    4676              :                         {
    4677           14 :                           vec_oprnd0 = build_constructor (atype, ctor_elts);
    4678           14 :                           gassign *new_stmt
    4679           14 :                             = gimple_build_assign (make_ssa_name (atype),
    4680              :                                                    vec_oprnd0);
    4681           14 :                           vect_finish_stmt_generation (vinfo, stmt_info,
    4682              :                                                        new_stmt, gsi);
    4683           14 :                           vargs.safe_push (gimple_assign_lhs (new_stmt));
    4684              :                         }
    4685              :                     }
    4686              :                 }
    4687              :               break;
    4688           66 :             case SIMD_CLONE_ARG_TYPE_MASK:
    4689           66 :               if (bestn->simdclone->mask_mode == VOIDmode)
    4690              :                 {
    4691           60 :                   atype = bestn->simdclone->args[i].vector_type;
    4692           60 :                   tree elt_type = TREE_TYPE (atype);
    4693           60 :                   tree one = fold_convert (elt_type, integer_one_node);
    4694           60 :                   tree zero = fold_convert (elt_type, integer_zero_node);
    4695           60 :                   callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
    4696           60 :                   caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
    4697           60 :                   o = vector_unroll_factor (nunits, callee_nelements);
    4698          120 :                   for (m = j * o; m < (j + 1) * o; m++)
    4699              :                     {
    4700           60 :                       if (maybe_lt (callee_nelements, caller_nelements))
    4701              :                         {
    4702              :                           /* The mask type has fewer elements than simdlen.  */
    4703              : 
    4704              :                           /* FORNOW */
    4705            0 :                           gcc_unreachable ();
    4706              :                         }
    4707           60 :                       else if (known_eq (callee_nelements, caller_nelements))
    4708              :                         {
    4709              :                           /* The SIMD clone function has the same number of
    4710              :                              elements as the current function.  */
    4711           60 :                           if (m == 0)
    4712           60 :                             vec_oprnds_i[i] = 0;
    4713           60 :                           vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4714           60 :                           if (loop_vinfo
    4715           60 :                               && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
    4716              :                             {
    4717            0 :                               vec_loop_masks *loop_masks
    4718              :                                 = &LOOP_VINFO_MASKS (loop_vinfo);
    4719            0 :                               tree loop_mask
    4720            0 :                                 = vect_get_loop_mask (loop_vinfo, gsi,
    4721              :                                                       loop_masks, ncopies_in,
    4722            0 :                                                       vectype, j);
    4723            0 :                               vec_oprnd0
    4724            0 :                                 = prepare_vec_mask (loop_vinfo,
    4725            0 :                                                     TREE_TYPE (loop_mask),
    4726              :                                                     loop_mask, vec_oprnd0,
    4727              :                                                     gsi);
    4728            0 :                               loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
    4729              :                                                                      loop_mask });
    4730              : 
    4731              :                             }
    4732           60 :                           vec_oprnd0
    4733           60 :                             = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
    4734              :                                       build_vector_from_val (atype, one),
    4735              :                                       build_vector_from_val (atype, zero));
    4736           60 :                           gassign *new_stmt
    4737           60 :                             = gimple_build_assign (make_ssa_name (atype),
    4738              :                                                    vec_oprnd0);
    4739           60 :                           vect_finish_stmt_generation (vinfo, stmt_info,
    4740              :                                                        new_stmt, gsi);
    4741           60 :                           vargs.safe_push (gimple_assign_lhs (new_stmt));
    4742              :                         }
    4743              :                       else
    4744              :                         {
    4745              :                           /* The mask type has more elements than simdlen.  */
    4746              : 
    4747              :                           /* FORNOW */
    4748            0 :                           gcc_unreachable ();
    4749              :                         }
    4750              :                     }
    4751              :                 }
    4752            6 :               else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
    4753              :                 {
    4754            6 :                   atype = bestn->simdclone->args[i].vector_type;
    4755            6 :                   poly_uint64 atype_subparts
    4756            6 :                     = exact_div (bestn->simdclone->simdlen,
    4757              :                                  bestn->simdclone->args[i].linear_step);
    4758            6 :                   o = bestn->simdclone->args[i].linear_step;
    4759           12 :                   for (m = j * o; m < (j + 1) * o; m++)
    4760              :                     {
    4761            6 :                       if (m == 0)
    4762            6 :                         vec_oprnds_i[i] = 0;
    4763            6 :                       if (maybe_lt (atype_subparts,
    4764            6 :                                     TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
    4765              :                         {
    4766              :                           /* The mask argument has fewer elements than the
    4767              :                              input vector.  */
    4768              :                           /* FORNOW */
    4769            0 :                           gcc_unreachable ();
    4770              :                         }
    4771            6 :                       else if (known_eq (atype_subparts,
    4772              :                                          TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
    4773              :                         {
    4774            6 :                           vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
    4775            6 :                           if (loop_vinfo
    4776            6 :                               && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
    4777              :                             {
    4778            1 :                               vec_loop_masks *loop_masks
    4779              :                                 = &LOOP_VINFO_MASKS (loop_vinfo);
    4780            1 :                               tree loop_mask
    4781            1 :                                 = vect_get_loop_mask (loop_vinfo, gsi,
    4782              :                                                       loop_masks, ncopies_in,
    4783              :                                                       vectype, j);
    4784            1 :                               vec_oprnd0
    4785            1 :                                 = prepare_vec_mask (loop_vinfo,
    4786            1 :                                                     TREE_TYPE (loop_mask),
    4787              :                                                     loop_mask, vec_oprnd0,
    4788              :                                                     gsi);
    4789              :                             }
    4790              :                           /* The vector mask argument matches the input
    4791              :                              in the number of lanes, but not necessarily
    4792              :                              in the mode.  */
    4793            6 :                           tree st = lang_hooks.types.type_for_mode
    4794            6 :                                       (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
    4795            6 :                           vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
    4796              :                                                vec_oprnd0);
    4797            6 :                           gassign *new_stmt
    4798            6 :                             = gimple_build_assign (make_ssa_name (st),
    4799              :                                                    vec_oprnd0);
    4800            6 :                           vect_finish_stmt_generation (vinfo, stmt_info,
    4801              :                                                        new_stmt, gsi);
    4802            6 :                           if (!types_compatible_p (atype, st))
    4803              :                             {
    4804            6 :                               new_stmt
    4805            6 :                                 = gimple_build_assign (make_ssa_name (atype),
    4806              :                                                        NOP_EXPR,
    4807              :                                                        gimple_assign_lhs
    4808              :                                                          (new_stmt));
    4809            6 :                               vect_finish_stmt_generation (vinfo, stmt_info,
    4810              :                                                            new_stmt, gsi);
    4811              :                             }
    4812            6 :                           vargs.safe_push (gimple_assign_lhs (new_stmt));
    4813              :                         }
    4814              :                       else
    4815              :                         {
    4816              :                           /* The mask argument has more elements than the
    4817              :                              input vector.  */
    4818              :                           /* FORNOW */
    4819            0 :                           gcc_unreachable ();
    4820              :                         }
    4821              :                     }
    4822              :                 }
    4823              :               else
    4824            0 :                 gcc_unreachable ();
    4825              :               break;
    4826          102 :             case SIMD_CLONE_ARG_TYPE_UNIFORM:
    4827          102 :               vargs.safe_push (op);
    4828          102 :               break;
    4829          121 :             case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
    4830          121 :             case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
    4831          121 :               if (j == 0)
    4832              :                 {
    4833          118 :                   gimple_seq stmts;
    4834          118 :                   arginfo[i].op
    4835          118 :                     = force_gimple_operand (unshare_expr (arginfo[i].op),
    4836              :                                             &stmts, true, NULL_TREE);
    4837          118 :                   if (stmts != NULL)
    4838              :                     {
    4839            0 :                       basic_block new_bb;
    4840            0 :                       edge pe = loop_preheader_edge (loop);
    4841            0 :                       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
    4842            0 :                       gcc_assert (!new_bb);
    4843              :                     }
    4844          118 :                   if (arginfo[i].simd_lane_linear)
    4845              :                     {
    4846            6 :                       vargs.safe_push (arginfo[i].op);
    4847            6 :                       break;
    4848              :                     }
    4849          112 :                   tree phi_res = copy_ssa_name (op);
    4850          112 :                   gphi *new_phi = create_phi_node (phi_res, loop->header);
    4851          112 :                   add_phi_arg (new_phi, arginfo[i].op,
    4852              :                                loop_preheader_edge (loop), UNKNOWN_LOCATION);
    4853          112 :                   enum tree_code code
    4854          196 :                     = POINTER_TYPE_P (TREE_TYPE (op))
    4855          112 :                       ? POINTER_PLUS_EXPR : PLUS_EXPR;
    4856          196 :                   tree type = POINTER_TYPE_P (TREE_TYPE (op))
    4857          196 :                               ? sizetype : TREE_TYPE (op);
    4858          112 :                   poly_widest_int cst
    4859          112 :                     = wi::mul (bestn->simdclone->args[i].linear_step,
    4860          112 :                                ncopies * nunits);
    4861          112 :                   tree tcst = wide_int_to_tree (type, cst);
    4862          112 :                   tree phi_arg = copy_ssa_name (op);
    4863          112 :                   gassign *new_stmt
    4864          112 :                     = gimple_build_assign (phi_arg, code, phi_res, tcst);
    4865          112 :                   gimple_stmt_iterator si = gsi_after_labels (loop->header);
    4866          112 :                   gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
    4867          112 :                   add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
    4868              :                                UNKNOWN_LOCATION);
    4869          112 :                   arginfo[i].op = phi_res;
    4870          112 :                   vargs.safe_push (phi_res);
    4871          112 :                 }
    4872              :               else
    4873              :                 {
    4874            3 :                   enum tree_code code
    4875            6 :                     = POINTER_TYPE_P (TREE_TYPE (op))
    4876            3 :                       ? POINTER_PLUS_EXPR : PLUS_EXPR;
    4877            6 :                   tree type = POINTER_TYPE_P (TREE_TYPE (op))
    4878            6 :                               ? sizetype : TREE_TYPE (op);
    4879            3 :                   poly_widest_int cst
    4880            3 :                     = wi::mul (bestn->simdclone->args[i].linear_step,
    4881            3 :                                j * nunits);
    4882            3 :                   tree tcst = wide_int_to_tree (type, cst);
    4883            3 :                   new_temp = make_ssa_name (TREE_TYPE (op));
    4884            3 :                   gassign *new_stmt
    4885            6 :                     = gimple_build_assign (new_temp, code,
    4886            3 :                                            arginfo[i].op, tcst);
    4887            3 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    4888            3 :                   vargs.safe_push (new_temp);
    4889            3 :                 }
    4890              :               break;
    4891            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
    4892            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
    4893            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
    4894            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
    4895            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
    4896            0 :             case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
    4897            0 :             default:
    4898            0 :               gcc_unreachable ();
    4899              :             }
    4900              :         }
    4901              : 
    4902          471 :       if (masked_call_offset == 0
    4903          405 :           && bestn->simdclone->inbranch
    4904           13 :           && bestn->simdclone->nargs > nargs)
    4905              :         {
    4906           13 :           unsigned long m, o;
    4907           13 :           size_t mask_i = bestn->simdclone->nargs - 1;
    4908           13 :           tree mask;
    4909           13 :           gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
    4910              :                       SIMD_CLONE_ARG_TYPE_MASK);
    4911              : 
    4912           13 :           tree mask_argtype = bestn->simdclone->args[mask_i].vector_type;
    4913           13 :           tree mask_vectype;
    4914           13 :           if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
    4915              :             {
    4916            2 :               callee_nelements = exact_div (bestn->simdclone->simdlen,
    4917              :                                             bestn->simdclone->args[i].linear_step);
    4918            2 :               mask_vectype = get_related_vectype_for_scalar_type
    4919            2 :                   (vinfo->vector_mode, TREE_TYPE (vectype), callee_nelements);
    4920              :             }
    4921              :           else
    4922              :             {
    4923           11 :               mask_vectype = mask_argtype;
    4924           11 :               callee_nelements = TYPE_VECTOR_SUBPARTS (mask_vectype);
    4925              :             }
    4926           13 :           o = vector_unroll_factor (nunits, callee_nelements);
    4927           26 :           for (m = j * o; m < (j + 1) * o; m++)
    4928              :             {
    4929           13 :               if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
    4930              :                 {
    4931            1 :                   vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
    4932            1 :                   mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
    4933              :                                              ncopies * o, mask_vectype, m);
    4934              :                 }
    4935              :               else
    4936           12 :                 mask = vect_build_all_ones_mask (vinfo, stmt_info,
    4937              :                                                  mask_argtype);
    4938              : 
    4939           13 :               gassign *new_stmt;
    4940           13 :               if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
    4941              :                 {
    4942              :                   /* This means we are dealing with integer mask modes.
    4943              :                      First convert to an integer type with the same size as
    4944              :                      the current vector type.  */
    4945            2 :                   unsigned HOST_WIDE_INT intermediate_size
    4946            2 :                       = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
    4947            2 :                   tree mid_int_type =
    4948            2 :                       build_nonstandard_integer_type (intermediate_size, 1);
    4949            2 :                   mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
    4950            2 :                   new_stmt
    4951            2 :                       = gimple_build_assign (make_ssa_name (mid_int_type),
    4952              :                                              mask);
    4953            2 :                   gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
    4954              :                   /* Then zero-extend to the mask mode.  */
    4955            2 :                   mask = fold_build1 (NOP_EXPR, mask_argtype,
    4956              :                                       gimple_get_lhs (new_stmt));
    4957              :                 }
    4958           11 :               else if (bestn->simdclone->mask_mode == VOIDmode)
    4959           11 :                 mask = build3 (VEC_COND_EXPR, mask_argtype, mask,
    4960              :                                build_one_cst (mask_argtype),
    4961              :                                build_zero_cst (mask_argtype));
    4962              :               else
    4963            0 :                 gcc_unreachable ();
    4964              : 
    4965           13 :               new_stmt = gimple_build_assign (make_ssa_name (mask_argtype),
    4966              :                                               mask);
    4967           13 :               vect_finish_stmt_generation (vinfo, stmt_info,
    4968              :                                            new_stmt, gsi);
    4969           13 :               mask = gimple_assign_lhs (new_stmt);
    4970           13 :               vargs.safe_push (mask);
    4971              :             }
    4972              :         }
    4973              : 
    4974          471 :       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
    4975          471 :       if (vec_dest)
    4976              :         {
    4977          465 :           gcc_assert (ratype
    4978              :                       || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
    4979          465 :           if (ratype)
    4980           15 :             new_temp = create_tmp_var (ratype);
    4981          450 :           else if (useless_type_conversion_p (vectype, rtype))
    4982          428 :             new_temp = make_ssa_name (vec_dest, new_call);
    4983              :           else
    4984           22 :             new_temp = make_ssa_name (rtype, new_call);
    4985          465 :           gimple_call_set_lhs (new_call, new_temp);
    4986              :         }
    4987          471 :       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
    4988          471 :       gimple *new_stmt = new_call;
    4989              : 
    4990          471 :       if (vec_dest)
    4991              :         {
    4992          465 :           if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
    4993              :             {
    4994           21 :               unsigned int k, l;
    4995           42 :               poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
    4996           42 :               poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
    4997           21 :               k = vector_unroll_factor (nunits,
    4998              :                                         TYPE_VECTOR_SUBPARTS (vectype));
    4999           21 :               gcc_assert ((k & (k - 1)) == 0);
    5000           75 :               for (l = 0; l < k; l++)
    5001              :                 {
    5002           54 :                   tree t;
    5003           54 :                   if (ratype)
    5004              :                     {
    5005           42 :                       t = build_fold_addr_expr (new_temp);
    5006           42 :                       t = build2 (MEM_REF, vectype, t,
    5007           42 :                                   build_int_cst (TREE_TYPE (t), l * bytes));
    5008              :                     }
    5009              :                   else
    5010           12 :                     t = build3 (BIT_FIELD_REF, vectype, new_temp,
    5011           12 :                                 bitsize_int (prec), bitsize_int (l * prec));
    5012           54 :                   new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
    5013           54 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5014              : 
    5015           54 :                   SLP_TREE_VEC_DEFS (slp_node)
    5016           54 :                     .quick_push (gimple_assign_lhs (new_stmt));
    5017              :                 }
    5018              : 
    5019           21 :               if (ratype)
    5020           15 :                 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
    5021           21 :               continue;
    5022           21 :             }
    5023          444 :           else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
    5024              :             {
    5025           16 :               unsigned int k;
    5026           16 :               if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
    5027           16 :                                         TYPE_VECTOR_SUBPARTS (rtype), &k))
    5028            0 :                 gcc_unreachable ();
    5029           16 :               gcc_assert ((k & (k - 1)) == 0);
    5030           16 :               if ((j & (k - 1)) == 0)
    5031            8 :                 vec_alloc (ret_ctor_elts, k);
    5032           16 :               if (ratype)
    5033              :                 {
    5034            0 :                   unsigned int m, o;
    5035            0 :                   o = vector_unroll_factor (nunits,
    5036              :                                             TYPE_VECTOR_SUBPARTS (rtype));
    5037            0 :                   for (m = 0; m < o; m++)
    5038              :                     {
    5039            0 :                       tree tem = build4 (ARRAY_REF, rtype, new_temp,
    5040            0 :                                          size_int (m), NULL_TREE, NULL_TREE);
    5041            0 :                       new_stmt = gimple_build_assign (make_ssa_name (rtype),
    5042              :                                                       tem);
    5043            0 :                       vect_finish_stmt_generation (vinfo, stmt_info,
    5044              :                                                    new_stmt, gsi);
    5045            0 :                       CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
    5046              :                                               gimple_assign_lhs (new_stmt));
    5047              :                     }
    5048            0 :                   vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
    5049              :                 }
    5050              :               else
    5051           16 :                 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
    5052           16 :               if ((j & (k - 1)) != k - 1)
    5053            8 :                 continue;
    5054            8 :               vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
    5055            8 :               new_stmt
    5056            8 :                 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
    5057            8 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5058              : 
    5059            8 :               SLP_TREE_VEC_DEFS (slp_node)
    5060            8 :                 .quick_push (gimple_assign_lhs (new_stmt));
    5061            8 :               continue;
    5062            8 :             }
    5063          428 :           else if (ratype)
    5064              :             {
    5065            0 :               tree t = build_fold_addr_expr (new_temp);
    5066            0 :               t = build2 (MEM_REF, vectype, t,
    5067            0 :                           build_int_cst (TREE_TYPE (t), 0));
    5068            0 :               new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
    5069            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5070            0 :               vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
    5071              :             }
    5072          428 :           else if (!useless_type_conversion_p (vectype, rtype))
    5073              :             {
    5074            0 :               vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
    5075            0 :               new_stmt
    5076            0 :                 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
    5077            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5078              :             }
    5079              :         }
    5080              : 
    5081          434 :       if (gimple_get_lhs (new_stmt))
    5082          428 :         SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
    5083              :     }
    5084              : 
    5085         1159 :   for (i = 0; i < nargs; ++i)
    5086              :     {
    5087          797 :       vec<tree> oprndsi = vec_oprnds[i];
    5088          797 :       oprndsi.release ();
    5089              :     }
    5090          362 :   vargs.release ();
    5091              : 
    5092              :   /* Mark the clone as no longer being a candidate for GC.  */
    5093          362 :   bestn->gc_candidate = false;
    5094              : 
    5095          362 :   return true;
    5096         1412 : }
    5097              : 
    5098              : 
    5099              : /* Function vect_gen_widened_results_half
    5100              : 
    5101              :    Create a vector stmt whose code, type, number of arguments, and result
    5102              :    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
    5103              :    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
    5104              :    In the case that CODE is a CALL_EXPR, this means that a call to DECL
    5105              :    needs to be created (DECL is a function-decl of a target-builtin).
    5106              :    STMT_INFO is the original scalar stmt that we are vectorizing.  */
    5107              : 
    5108              : static gimple *
    5109        31908 : vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
    5110              :                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
    5111              :                                tree vec_dest, gimple_stmt_iterator *gsi,
    5112              :                                stmt_vec_info stmt_info)
    5113              : {
    5114        31908 :   gimple *new_stmt;
    5115        31908 :   tree new_temp;
    5116              : 
    5117              :   /* Generate half of the widened result:  */
    5118        31908 :   if (op_type != binary_op)
    5119        30798 :     vec_oprnd1 = NULL;
    5120        31908 :   new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
    5121        31908 :   new_temp = make_ssa_name (vec_dest, new_stmt);
    5122        31908 :   gimple_set_lhs (new_stmt, new_temp);
    5123        31908 :   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5124              : 
    5125        31908 :   return new_stmt;
    5126              : }
    5127              : 
    5128              : 
    5129              : /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
    5130              :    For multi-step conversions store the resulting vectors and call the function
    5131              :    recursively. When NARROW_SRC_P is true, there's still a conversion after
    5132              :    narrowing, don't store the vectors in the SLP_NODE or in vector info of
    5133              :    the scalar statement(or in STMT_VINFO_RELATED_STMT chain).  */
    5134              : 
    5135              : static void
    5136        12052 : vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
    5137              :                                        int multi_step_cvt,
    5138              :                                        stmt_vec_info stmt_info,
    5139              :                                        vec<tree> &vec_dsts,
    5140              :                                        gimple_stmt_iterator *gsi,
    5141              :                                        slp_tree slp_node, code_helper code,
    5142              :                                        bool narrow_src_p)
    5143              : {
    5144        12052 :   unsigned int i;
    5145        12052 :   tree vop0, vop1, new_tmp, vec_dest;
    5146              : 
    5147        12052 :   vec_dest = vec_dsts.pop ();
    5148              : 
    5149        28513 :   for (i = 0; i < vec_oprnds->length (); i += 2)
    5150              :     {
    5151              :       /* Create demotion operation.  */
    5152        16461 :       vop0 = (*vec_oprnds)[i];
    5153        16461 :       vop1 = (*vec_oprnds)[i + 1];
    5154        16461 :       gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
    5155        16461 :       new_tmp = make_ssa_name (vec_dest, new_stmt);
    5156        16461 :       gimple_set_lhs (new_stmt, new_tmp);
    5157        16461 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5158        16461 :       if (multi_step_cvt || narrow_src_p)
    5159              :         /* Store the resulting vector for next recursive call,
    5160              :            or return the resulting vector_tmp for NARROW FLOAT_EXPR.  */
    5161         6745 :         (*vec_oprnds)[i/2] = new_tmp;
    5162              :       else
    5163              :         {
    5164              :           /* This is the last step of the conversion sequence. Store the
    5165              :              vectors in SLP_NODE.  */
    5166         9716 :           slp_node->push_vec_def (new_stmt);
    5167              :         }
    5168              :     }
    5169              : 
    5170              :   /* For multi-step demotion operations we first generate demotion operations
    5171              :      from the source type to the intermediate types, and then combine the
    5172              :      results (stored in VEC_OPRNDS) in demotion operation to the destination
    5173              :      type.  */
    5174        12052 :   if (multi_step_cvt)
    5175              :     {
    5176              :       /* At each level of recursion we have half of the operands we had at the
    5177              :          previous level.  */
    5178         3000 :       vec_oprnds->truncate ((i+1)/2);
    5179         3000 :       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
    5180              :                                              multi_step_cvt - 1,
    5181              :                                              stmt_info, vec_dsts, gsi,
    5182         3000 :                                              slp_node, VEC_PACK_TRUNC_EXPR,
    5183              :                                              narrow_src_p);
    5184              :     }
    5185              : 
    5186        12052 :   vec_dsts.quick_push (vec_dest);
    5187        12052 : }
    5188              : 
    5189              : 
    5190              : /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
    5191              :    and VEC_OPRNDS1, for a binary operation associated with scalar statement
    5192              :    STMT_INFO.  For multi-step conversions store the resulting vectors and
    5193              :    call the function recursively.  */
    5194              : 
    5195              : static void
    5196        11626 : vect_create_vectorized_promotion_stmts (vec_info *vinfo,
    5197              :                                         vec<tree> *vec_oprnds0,
    5198              :                                         vec<tree> *vec_oprnds1,
    5199              :                                         stmt_vec_info stmt_info, tree vec_dest,
    5200              :                                         gimple_stmt_iterator *gsi,
    5201              :                                         code_helper ch1,
    5202              :                                         code_helper ch2, int op_type)
    5203              : {
    5204        11626 :   int i;
    5205        11626 :   tree vop0, vop1, new_tmp1, new_tmp2;
    5206        11626 :   gimple *new_stmt1, *new_stmt2;
    5207        11626 :   vec<tree> vec_tmp = vNULL;
    5208              : 
    5209        11626 :   vec_tmp.create (vec_oprnds0->length () * 2);
    5210        39206 :   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
    5211              :     {
    5212        15954 :       if (op_type == binary_op)
    5213          555 :         vop1 = (*vec_oprnds1)[i];
    5214              :       else
    5215              :         vop1 = NULL_TREE;
    5216              : 
    5217              :       /* Generate the two halves of promotion operation.  */
    5218        15954 :       new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
    5219              :                                                  op_type, vec_dest, gsi,
    5220              :                                                  stmt_info);
    5221        15954 :       new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
    5222              :                                                  op_type, vec_dest, gsi,
    5223              :                                                  stmt_info);
    5224        15954 :       if (is_gimple_call (new_stmt1))
    5225              :         {
    5226            0 :           new_tmp1 = gimple_call_lhs (new_stmt1);
    5227            0 :           new_tmp2 = gimple_call_lhs (new_stmt2);
    5228              :         }
    5229              :       else
    5230              :         {
    5231        15954 :           new_tmp1 = gimple_assign_lhs (new_stmt1);
    5232        15954 :           new_tmp2 = gimple_assign_lhs (new_stmt2);
    5233              :         }
    5234              : 
    5235              :       /* Store the results for the next step.  */
    5236        15954 :       vec_tmp.quick_push (new_tmp1);
    5237        15954 :       vec_tmp.quick_push (new_tmp2);
    5238              :     }
    5239              : 
    5240        11626 :   vec_oprnds0->release ();
    5241        11626 :   *vec_oprnds0 = vec_tmp;
    5242        11626 : }
    5243              : 
    5244              : /* Create vectorized promotion stmts for widening stmts using only half the
    5245              :    potential vector size for input.  */
    5246              : static void
    5247           14 : vect_create_half_widening_stmts (vec_info *vinfo,
    5248              :                                         vec<tree> *vec_oprnds0,
    5249              :                                         vec<tree> *vec_oprnds1,
    5250              :                                         stmt_vec_info stmt_info, tree vec_dest,
    5251              :                                         gimple_stmt_iterator *gsi,
    5252              :                                         code_helper code1,
    5253              :                                         int op_type)
    5254              : {
    5255           14 :   int i;
    5256           14 :   tree vop0, vop1;
    5257           14 :   gimple *new_stmt1;
    5258           14 :   gimple *new_stmt2;
    5259           14 :   gimple *new_stmt3;
    5260           14 :   vec<tree> vec_tmp = vNULL;
    5261              : 
    5262           14 :   vec_tmp.create (vec_oprnds0->length ());
    5263           28 :   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
    5264              :     {
    5265           14 :       tree new_tmp1, new_tmp2, new_tmp3, out_type;
    5266              : 
    5267           14 :       gcc_assert (op_type == binary_op);
    5268           14 :       vop1 = (*vec_oprnds1)[i];
    5269              : 
    5270              :       /* Widen the first vector input.  */
    5271           14 :       out_type = TREE_TYPE (vec_dest);
    5272           14 :       new_tmp1 = make_ssa_name (out_type);
    5273           14 :       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
    5274           14 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
    5275           14 :       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
    5276              :         {
    5277              :           /* Widen the second vector input.  */
    5278           14 :           new_tmp2 = make_ssa_name (out_type);
    5279           14 :           new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
    5280           14 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
    5281              :           /* Perform the operation.  With both vector inputs widened.  */
    5282           14 :           new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
    5283              :         }
    5284              :       else
    5285              :         {
    5286              :           /* Perform the operation.  With the single vector input widened.  */
    5287            0 :           new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
    5288              :         }
    5289              : 
    5290           14 :       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
    5291           14 :       gimple_assign_set_lhs (new_stmt3, new_tmp3);
    5292           14 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
    5293              : 
    5294              :       /* Store the results for the next step.  */
    5295           14 :       vec_tmp.quick_push (new_tmp3);
    5296              :     }
    5297              : 
    5298           14 :   vec_oprnds0->release ();
    5299           14 :   *vec_oprnds0 = vec_tmp;
    5300           14 : }
    5301              : 
    5302              : 
    5303              : /* Check if STMT_INFO performs a conversion operation that can be vectorized.
    5304              :    If COST_VEC is passed, calculate costs but don't change anything,
    5305              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    5306              :    it, and insert it at GSI.
    5307              :    Return true if STMT_INFO is vectorizable in this way.  */
    5308              : 
    5309              : static bool
    5310      2679063 : vectorizable_conversion (vec_info *vinfo,
    5311              :                          stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    5312              :                          slp_tree slp_node,
    5313              :                          stmt_vector_for_cost *cost_vec)
    5314              : {
    5315      2679063 :   tree vec_dest, cvt_op = NULL_TREE;
    5316      2679063 :   tree scalar_dest;
    5317      2679063 :   tree op0, op1 = NULL_TREE;
    5318      2679063 :   tree_code tc1;
    5319      2679063 :   code_helper code, code1, code2;
    5320      2679063 :   code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
    5321      2679063 :   tree new_temp;
    5322      2679063 :   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
    5323      2679063 :   poly_uint64 nunits_in;
    5324      2679063 :   poly_uint64 nunits_out;
    5325      2679063 :   tree vectype_out, vectype_in;
    5326      2679063 :   int i;
    5327      2679063 :   tree lhs_type, rhs_type;
    5328              :   /* For conversions between floating point and integer, there're 2 NARROW
    5329              :      cases. NARROW_SRC is for FLOAT_EXPR, means
    5330              :      integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
    5331              :      This is safe when the range of the source integer can fit into the lower
    5332              :      precision. NARROW_DST is for FIX_TRUNC_EXPR, means
    5333              :      floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
    5334              :      For other conversions, when there's narrowing, NARROW_DST is used as
    5335              :      default.  */
    5336      2679063 :   enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
    5337      2679063 :   vec<tree> vec_oprnds0 = vNULL;
    5338      2679063 :   vec<tree> vec_oprnds1 = vNULL;
    5339      2679063 :   tree vop0;
    5340      2679063 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    5341      2679063 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5342      2679063 :   int multi_step_cvt = 0;
    5343      2679063 :   vec<tree> interm_types = vNULL;
    5344      2679063 :   tree intermediate_type, cvt_type = NULL_TREE;
    5345      2679063 :   int op_type;
    5346      2679063 :   unsigned short fltsz;
    5347              : 
    5348              :   /* Is STMT a vectorizable conversion?   */
    5349              : 
    5350      2679063 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    5351              :     return false;
    5352              : 
    5353      2679063 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    5354       234683 :       && cost_vec)
    5355              :     return false;
    5356              : 
    5357      2444380 :   gimple* stmt = stmt_info->stmt;
    5358      2444380 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
    5359              :     return false;
    5360              : 
    5361      2385600 :   if (gimple_get_lhs (stmt) == NULL_TREE
    5362      2385600 :       || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
    5363       815383 :     return false;
    5364              : 
    5365      1570217 :   if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
    5366              :     return false;
    5367              : 
    5368      1570217 :   if (is_gimple_assign (stmt))
    5369              :     {
    5370      1558236 :       code = gimple_assign_rhs_code (stmt);
    5371      1558236 :       op_type = TREE_CODE_LENGTH ((tree_code) code);
    5372              :     }
    5373        11981 :   else if (gimple_call_internal_p (stmt))
    5374              :     {
    5375         7862 :       code = gimple_call_internal_fn (stmt);
    5376         7862 :       op_type = gimple_call_num_args (stmt);
    5377              :     }
    5378              :   else
    5379              :     return false;
    5380              : 
    5381      1566098 :   bool widen_arith = (code == WIDEN_MULT_EXPR
    5382      1563777 :                  || code == WIDEN_LSHIFT_EXPR
    5383      3129875 :                  || widening_fn_p (code));
    5384              : 
    5385      1563777 :   if (!widen_arith
    5386      1563777 :       && !CONVERT_EXPR_CODE_P (code)
    5387      1401360 :       && code != FIX_TRUNC_EXPR
    5388      1399614 :       && code != FLOAT_EXPR)
    5389              :     return false;
    5390              : 
    5391              :   /* Check types of lhs and rhs.  */
    5392       184740 :   scalar_dest = gimple_get_lhs (stmt);
    5393       184740 :   lhs_type = TREE_TYPE (scalar_dest);
    5394       184740 :   vectype_out = SLP_TREE_VECTYPE (slp_node);
    5395              : 
    5396              :   /* Check the operands of the operation.  */
    5397       184740 :   slp_tree slp_op0, slp_op1 = NULL;
    5398       184740 :   if (!vect_is_simple_use (vinfo, slp_node,
    5399              :                            0, &op0, &slp_op0, &dt[0], &vectype_in))
    5400              :     {
    5401            0 :       if (dump_enabled_p ())
    5402            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5403              :                          "use not simple.\n");
    5404            0 :       return false;
    5405              :     }
    5406              : 
    5407       184740 :   rhs_type = TREE_TYPE (op0);
    5408       182994 :   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
    5409       352912 :       && !((INTEGRAL_TYPE_P (lhs_type)
    5410       154789 :             && INTEGRAL_TYPE_P (rhs_type))
    5411              :            || (SCALAR_FLOAT_TYPE_P (lhs_type)
    5412         8825 :                && SCALAR_FLOAT_TYPE_P (rhs_type))))
    5413              :     return false;
    5414              : 
    5415       180182 :   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
    5416       160063 :       && INTEGRAL_TYPE_P (lhs_type)
    5417       313164 :       && !type_has_mode_precision_p (lhs_type))
    5418              :     {
    5419          447 :       if (dump_enabled_p ())
    5420            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5421              :                          "type conversion to bit-precision unsupported\n");
    5422          447 :       return false;
    5423              :     }
    5424              : 
    5425       179735 :   if (op_type == binary_op)
    5426              :     {
    5427         2321 :       gcc_assert (code == WIDEN_MULT_EXPR
    5428              :                   || code == WIDEN_LSHIFT_EXPR
    5429              :                   || widening_fn_p (code));
    5430              : 
    5431         2321 :       op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
    5432            0 :                                      gimple_call_arg (stmt, 0);
    5433         2321 :       tree vectype1_in;
    5434         2321 :       if (!vect_is_simple_use (vinfo, slp_node, 1,
    5435              :                                &op1, &slp_op1, &dt[1], &vectype1_in))
    5436              :         {
    5437            0 :           if (dump_enabled_p ())
    5438            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5439              :                              "use not simple.\n");
    5440            0 :           return false;
    5441              :         }
    5442              :       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
    5443              :          OP1.  */
    5444         2321 :       if (!vectype_in)
    5445          102 :         vectype_in = vectype1_in;
    5446              :     }
    5447              : 
    5448              :   /* If op0 is an external or constant def, infer the vector type
    5449              :      from the scalar type.  */
    5450       179735 :   if (!vectype_in)
    5451        19835 :     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
    5452       179735 :   if (!cost_vec)
    5453        22892 :     gcc_assert (vectype_in);
    5454       179735 :   if (!vectype_in)
    5455              :     {
    5456          258 :       if (dump_enabled_p ())
    5457            2 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5458              :                          "no vectype for scalar type %T\n", rhs_type);
    5459              : 
    5460          258 :       return false;
    5461              :     }
    5462              : 
    5463       358954 :   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
    5464       179477 :       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
    5465              :     {
    5466          229 :       if (dump_enabled_p ())
    5467           36 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5468              :                          "can't convert between boolean and non "
    5469              :                          "boolean vectors %T\n", rhs_type);
    5470              : 
    5471          229 :       return false;
    5472              :     }
    5473              : 
    5474       179248 :   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
    5475       179248 :   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    5476       179248 :   if (known_eq (nunits_out, nunits_in))
    5477        85339 :     if (widen_arith)
    5478              :       modifier = WIDEN;
    5479              :     else
    5480       179248 :       modifier = NONE;
    5481        93909 :   else if (multiple_p (nunits_out, nunits_in))
    5482              :     modifier = NARROW_DST;
    5483              :   else
    5484              :     {
    5485        52022 :       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
    5486              :       modifier = WIDEN;
    5487              :     }
    5488              : 
    5489       179248 :   bool found_mode = false;
    5490       179248 :   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
    5491       179248 :   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
    5492       179248 :   opt_scalar_mode rhs_mode_iter;
    5493       179248 :   auto_vec<std::pair<tree, tree_code>, 2> converts;
    5494       179248 :   bool evenodd_ok = false;
    5495              : 
    5496              :   /* Supportable by target?  */
    5497       179248 :   switch (modifier)
    5498              :     {
    5499        85097 :     case NONE:
    5500        85097 :       if (code != FIX_TRUNC_EXPR
    5501        84045 :           && code != FLOAT_EXPR
    5502       160172 :           && !CONVERT_EXPR_CODE_P (code))
    5503              :         return false;
    5504        85097 :       gcc_assert (code.is_tree_code ());
    5505        85097 :       if (supportable_indirect_convert_operation (code,
    5506              :                                                   vectype_out, vectype_in,
    5507              :                                                   converts, op0, slp_op0))
    5508              :         {
    5509        18908 :           gcc_assert (converts.length () <= 2);
    5510        18908 :           if (converts.length () == 1)
    5511        18834 :             code1 = converts[0].second;
    5512              :           else
    5513              :             {
    5514           74 :               cvt_type = NULL_TREE;
    5515           74 :               multi_step_cvt = converts.length () - 1;
    5516           74 :               codecvt1 = converts[0].second;
    5517           74 :               code1 = converts[1].second;
    5518           74 :               interm_types.safe_push (converts[0].first);
    5519              :             }
    5520              :           break;
    5521              :         }
    5522              : 
    5523              :       /* FALLTHRU */
    5524        66189 :     unsupported:
    5525        73023 :       if (dump_enabled_p ())
    5526         6078 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5527              :                          "conversion not supported by target.\n");
    5528              :       return false;
    5529              : 
    5530        52264 :     case WIDEN:
    5531        52264 :       if (known_eq (nunits_in, nunits_out))
    5532              :         {
    5533          484 :           if (!(code.is_tree_code ()
    5534          242 :                 && supportable_half_widening_operation ((tree_code) code,
    5535              :                                                         vectype_out, vectype_in,
    5536              :                                                         &tc1)))
    5537           69 :             goto unsupported;
    5538          173 :           code1 = tc1;
    5539          173 :           gcc_assert (!(multi_step_cvt && op_type == binary_op));
    5540              :           break;
    5541              :         }
    5542              :       /* Elements in a vector can only be reordered if used in a reduction
    5543              :          operation only.  */
    5544        52022 :       if (code == WIDEN_MULT_EXPR
    5545         2079 :           && loop_vinfo
    5546         2032 :           && !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)
    5547              :           /* For a SLP reduction we cannot swizzle lanes, detecting a
    5548              :              reduction chain isn't possible here.  */
    5549        54032 :           && SLP_TREE_LANES (slp_node) == 1)
    5550              :         {
    5551              :           /* ???  There is no way to look for SLP uses, so work on
    5552              :              the stmt and what the stmt-based cycle detection gives us.  */
    5553         1908 :           tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
    5554         1908 :           stmt_vec_info use_stmt_info
    5555         1908 :             = lhs ? loop_vinfo->lookup_single_use (lhs) : NULL;
    5556         1908 :           if (use_stmt_info
    5557         1759 :               && STMT_VINFO_REDUC_DEF (use_stmt_info))
    5558        52022 :             evenodd_ok = true;
    5559              :         }
    5560        52022 :       if (supportable_widening_operation (code, vectype_out, vectype_in,
    5561              :                                           evenodd_ok, &code1,
    5562              :                                           &code2, &multi_step_cvt,
    5563              :                                           &interm_types))
    5564              :         {
    5565              :           /* Binary widening operation can only be supported directly by the
    5566              :              architecture.  */
    5567        50102 :           gcc_assert (!(multi_step_cvt && op_type == binary_op));
    5568              :           break;
    5569              :         }
    5570              : 
    5571         1920 :       if (code != FLOAT_EXPR
    5572         2292 :           || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
    5573         1734 :         goto unsupported;
    5574              : 
    5575          186 :       fltsz = GET_MODE_SIZE (lhs_mode);
    5576          273 :       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
    5577              :         {
    5578          273 :           rhs_mode = rhs_mode_iter.require ();
    5579          546 :           if (GET_MODE_SIZE (rhs_mode) > fltsz)
    5580              :             break;
    5581              : 
    5582          273 :           cvt_type
    5583          273 :             = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
    5584          273 :           cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
    5585          273 :           if (cvt_type == NULL_TREE)
    5586            0 :             goto unsupported;
    5587              : 
    5588          546 :           if (GET_MODE_SIZE (rhs_mode) == fltsz)
    5589              :             {
    5590           81 :               tc1 = ERROR_MARK;
    5591           81 :               gcc_assert (code.is_tree_code ());
    5592           81 :               if (!supportable_convert_operation ((tree_code) code, vectype_out,
    5593              :                                                   cvt_type, &tc1))
    5594           22 :                 goto unsupported;
    5595           59 :               codecvt1 = tc1;
    5596              :             }
    5597          192 :           else if (!supportable_widening_operation (code, vectype_out,
    5598              :                                                     cvt_type, evenodd_ok,
    5599              :                                                     &codecvt1,
    5600              :                                                     &codecvt2, &multi_step_cvt,
    5601              :                                                     &interm_types))
    5602           87 :             continue;
    5603              :           else
    5604          105 :             gcc_assert (multi_step_cvt == 0);
    5605              : 
    5606          164 :           if (supportable_widening_operation (NOP_EXPR, cvt_type,
    5607              :                                               vectype_in, evenodd_ok, &code1,
    5608              :                                               &code2, &multi_step_cvt,
    5609              :                                               &interm_types))
    5610              :             {
    5611              :               found_mode = true;
    5612              :               break;
    5613              :             }
    5614              :         }
    5615              : 
    5616          164 :       if (!found_mode)
    5617            0 :         goto unsupported;
    5618              : 
    5619          328 :       if (GET_MODE_SIZE (rhs_mode) == fltsz)
    5620           59 :         codecvt2 = ERROR_MARK;
    5621              :       else
    5622              :         {
    5623          105 :           multi_step_cvt++;
    5624          105 :           interm_types.safe_push (cvt_type);
    5625          105 :           cvt_type = NULL_TREE;
    5626              :         }
    5627              :       break;
    5628              : 
    5629        41887 :     case NARROW_DST:
    5630        41887 :       gcc_assert (op_type == unary_op);
    5631        41887 :       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
    5632              :                                            &code1, &multi_step_cvt,
    5633              :                                            &interm_types))
    5634              :         break;
    5635              : 
    5636        15411 :       if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
    5637          984 :         goto unsupported;
    5638              : 
    5639         4153 :       if (code == FIX_TRUNC_EXPR)
    5640              :         {
    5641          107 :           cvt_type
    5642          107 :             = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
    5643          107 :           cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
    5644          107 :           if (cvt_type == NULL_TREE)
    5645            0 :             goto unsupported;
    5646          107 :           if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
    5647              :                                               &tc1))
    5648          105 :             codecvt1 = tc1;
    5649              :           else
    5650            2 :             goto unsupported;
    5651          105 :           if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
    5652              :                                                &code1, &multi_step_cvt,
    5653              :                                                &interm_types))
    5654              :             break;
    5655              :         }
    5656              :       /* If op0 can be represented with low precision integer,
    5657              :          truncate it to cvt_type and the do FLOAT_EXPR.  */
    5658         4046 :       else if (code == FLOAT_EXPR)
    5659              :         {
    5660          137 :           if (cost_vec)
    5661              :             {
    5662          132 :               wide_int op_min_value, op_max_value;
    5663          132 :               tree def;
    5664              : 
    5665              :               /* ???  Merge ranges in case of more than one lane.  */
    5666          132 :               if (SLP_TREE_LANES (slp_op0) != 1
    5667          130 :                   || !(def = vect_get_slp_scalar_def (slp_op0, 0))
    5668          262 :                   || !vect_get_range_info (def, &op_min_value, &op_max_value))
    5669          106 :                 goto unsupported;
    5670              : 
    5671           26 :               if ((wi::min_precision (op_max_value, SIGNED)
    5672           26 :                    > GET_MODE_BITSIZE (lhs_mode))
    5673           26 :                   || (wi::min_precision (op_min_value, SIGNED)
    5674           24 :                       > GET_MODE_BITSIZE (lhs_mode)))
    5675            2 :                 goto unsupported;
    5676          132 :             }
    5677              : 
    5678           29 :           cvt_type
    5679           29 :             = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
    5680           29 :           cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
    5681           29 :           if (cvt_type == NULL_TREE)
    5682            0 :             goto unsupported;
    5683           29 :           if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
    5684              :                                                 &code1, &multi_step_cvt,
    5685              :                                                 &interm_types))
    5686            2 :             goto unsupported;
    5687           27 :           if (supportable_convert_operation ((tree_code) code, vectype_out,
    5688              :                                              cvt_type, &tc1))
    5689              :             {
    5690           27 :               codecvt1 = tc1;
    5691           27 :               modifier = NARROW_SRC;
    5692           27 :               break;
    5693              :             }
    5694              :         }
    5695              : 
    5696         3913 :       goto unsupported;
    5697              : 
    5698              :     default:
    5699              :       gcc_unreachable ();
    5700              :     }
    5701              : 
    5702       106225 :   if (modifier == WIDEN
    5703       106225 :       && loop_vinfo
    5704        49285 :       && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
    5705       127264 :       && (code1 == VEC_WIDEN_MULT_EVEN_EXPR
    5706        21017 :           || widening_evenodd_fn_p (code1)))
    5707              :     {
    5708           22 :       if (dump_enabled_p ())
    5709            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5710              :                          "can't use a fully-masked loop because"
    5711              :                          " widening operation on even/odd elements"
    5712              :                          " mixes up lanes.\n");
    5713           22 :       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    5714              :     }
    5715              : 
    5716       106225 :   if (cost_vec)         /* transformation not required.  */
    5717              :     {
    5718        83333 :       if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
    5719        83333 :           || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in))
    5720              :         {
    5721            0 :           if (dump_enabled_p ())
    5722            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5723              :                              "incompatible vector types for invariants\n");
    5724            0 :           return false;
    5725              :         }
    5726        83333 :       DUMP_VECT_SCOPE ("vectorizable_conversion");
    5727        83333 :       unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
    5728        83333 :       if (modifier == NONE)
    5729              :         {
    5730        14910 :           SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
    5731        14910 :           vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
    5732              :                                   slp_node, cost_vec);
    5733              :         }
    5734        68423 :       else if (modifier == NARROW_SRC || modifier == NARROW_DST)
    5735              :         {
    5736        27826 :           SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
    5737              :           /* The final packing step produces one vector result per copy.  */
    5738        27826 :           vect_model_promotion_demotion_cost (slp_node, nvectors,
    5739              :                                               multi_step_cvt, cost_vec,
    5740              :                                               widen_arith);
    5741              :         }
    5742              :       else
    5743              :         {
    5744        40597 :           SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
    5745              :           /* The initial unpacking step produces two vector results
    5746              :              per copy.  MULTI_STEP_CVT is 0 for a single conversion,
    5747              :              so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
    5748        40597 :           vect_model_promotion_demotion_cost (slp_node,
    5749              :                                               nvectors >> multi_step_cvt,
    5750              :                                               multi_step_cvt, cost_vec,
    5751              :                                               widen_arith);
    5752              :         }
    5753        83333 :       interm_types.release ();
    5754        83333 :       return true;
    5755        83333 :     }
    5756              : 
    5757              :   /* Transform.  */
    5758        22892 :   if (dump_enabled_p ())
    5759         4287 :     dump_printf_loc (MSG_NOTE, vect_location, "transform conversion.\n");
    5760              : 
    5761        22892 :   if (op_type == binary_op)
    5762              :     {
    5763          508 :       if (CONSTANT_CLASS_P (op0))
    5764            0 :         op0 = fold_convert (TREE_TYPE (op1), op0);
    5765          508 :       else if (CONSTANT_CLASS_P (op1))
    5766          234 :         op1 = fold_convert (TREE_TYPE (op0), op1);
    5767              :     }
    5768              : 
    5769              :   /* In case of multi-step conversion, we first generate conversion operations
    5770              :      to the intermediate types, and then from that types to the final one.
    5771              :      We create vector destinations for the intermediate type (TYPES) received
    5772              :      from supportable_*_operation, and store them in the correct order
    5773              :      for future use in vect_create_vectorized_*_stmts ().  */
    5774        22892 :   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
    5775        22892 :   bool widen_or_narrow_float_p
    5776        22892 :     = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
    5777        22892 :   vec_dest = vect_create_destination_var (scalar_dest,
    5778              :                                           widen_or_narrow_float_p
    5779              :                                           ? cvt_type : vectype_out);
    5780        22892 :   vec_dsts.quick_push (vec_dest);
    5781              : 
    5782        22892 :   if (multi_step_cvt)
    5783              :     {
    5784         9148 :       for (i = interm_types.length () - 1;
    5785         9148 :            interm_types.iterate (i, &intermediate_type); i--)
    5786              :         {
    5787         4819 :           vec_dest = vect_create_destination_var (scalar_dest,
    5788              :                                                   intermediate_type);
    5789         4819 :           vec_dsts.quick_push (vec_dest);
    5790              :         }
    5791              :     }
    5792              : 
    5793        22892 :   if (cvt_type)
    5794           73 :     vec_dest = vect_create_destination_var (scalar_dest,
    5795              :                                             widen_or_narrow_float_p
    5796              :                                             ? vectype_out : cvt_type);
    5797              : 
    5798        22892 :   switch (modifier)
    5799              :     {
    5800         3998 :     case NONE:
    5801         3998 :       vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
    5802              :       /* vec_dest is intermediate type operand when multi_step_cvt.  */
    5803         3998 :       if (multi_step_cvt)
    5804              :         {
    5805           21 :           cvt_op = vec_dest;
    5806           21 :           vec_dest = vec_dsts[0];
    5807              :         }
    5808              : 
    5809         8372 :       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
    5810              :         {
    5811              :           /* Arguments are ready, create the new vector stmt.  */
    5812         4374 :           gimple* new_stmt;
    5813         4374 :           if (multi_step_cvt)
    5814              :             {
    5815           21 :               gcc_assert (multi_step_cvt == 1);
    5816           21 :               new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
    5817           21 :               new_temp = make_ssa_name (cvt_op, new_stmt);
    5818           21 :               gimple_assign_set_lhs (new_stmt, new_temp);
    5819           21 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5820           21 :               vop0 = new_temp;
    5821              :             }
    5822         4374 :           new_stmt = vect_gimple_build (vec_dest, code1, vop0);
    5823         4374 :           new_temp = make_ssa_name (vec_dest, new_stmt);
    5824         4374 :           gimple_set_lhs (new_stmt, new_temp);
    5825         4374 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5826              : 
    5827         4374 :           slp_node->push_vec_def (new_stmt);
    5828              :         }
    5829              :       break;
    5830              : 
    5831         9842 :     case WIDEN:
    5832              :       /* In case the vectorization factor (VF) is bigger than the number
    5833              :          of elements that we can fit in a vectype (nunits), we have to
    5834              :          generate more than one vector stmt - i.e - we need to "unroll"
    5835              :          the vector stmt by a factor VF/nunits.  */
    5836         9842 :       vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0,
    5837         9842 :                          code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
    5838              :                          &vec_oprnds1);
    5839         9842 :       if (code == WIDEN_LSHIFT_EXPR)
    5840              :         {
    5841            0 :           int oprnds_size = vec_oprnds0.length ();
    5842            0 :           vec_oprnds1.create (oprnds_size);
    5843            0 :           for (i = 0; i < oprnds_size; ++i)
    5844            0 :             vec_oprnds1.quick_push (op1);
    5845              :         }
    5846              :       /* Arguments are ready.  Create the new vector stmts.  */
    5847        21482 :       for (i = multi_step_cvt; i >= 0; i--)
    5848              :         {
    5849        11640 :           tree this_dest = vec_dsts[i];
    5850        11640 :           code_helper c1 = code1, c2 = code2;
    5851        11640 :           if (i == 0 && codecvt2 != ERROR_MARK)
    5852              :             {
    5853           48 :               c1 = codecvt1;
    5854           48 :               c2 = codecvt2;
    5855              :             }
    5856        11640 :           if (known_eq (nunits_out, nunits_in))
    5857           14 :             vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
    5858              :                                              stmt_info, this_dest, gsi, c1,
    5859              :                                              op_type);
    5860              :           else
    5861        11626 :             vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
    5862              :                                                     &vec_oprnds1, stmt_info,
    5863              :                                                     this_dest, gsi,
    5864              :                                                     c1, c2, op_type);
    5865              :         }
    5866              : 
    5867        37556 :       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
    5868              :         {
    5869        27714 :           gimple *new_stmt;
    5870        27714 :           if (cvt_type)
    5871              :             {
    5872          120 :               new_temp = make_ssa_name (vec_dest);
    5873          120 :               new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
    5874          120 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5875              :             }
    5876              :           else
    5877        27594 :             new_stmt = SSA_NAME_DEF_STMT (vop0);
    5878              : 
    5879        27714 :           slp_node->push_vec_def (new_stmt);
    5880              :         }
    5881              :       break;
    5882              : 
    5883         9052 :     case NARROW_SRC:
    5884         9052 :     case NARROW_DST:
    5885              :       /* In case the vectorization factor (VF) is bigger than the number
    5886              :          of elements that we can fit in a vectype (nunits), we have to
    5887              :          generate more than one vector stmt - i.e - we need to "unroll"
    5888              :          the vector stmt by a factor VF/nunits.  */
    5889         9052 :       vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
    5890              :       /* Arguments are ready.  Create the new vector stmts.  */
    5891         9052 :       if (cvt_type && modifier == NARROW_DST)
    5892          153 :         FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
    5893              :           {
    5894          124 :             new_temp = make_ssa_name (vec_dest);
    5895          124 :             gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
    5896          124 :             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5897          124 :             vec_oprnds0[i] = new_temp;
    5898              :           }
    5899              : 
    5900         9052 :       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
    5901              :                                              multi_step_cvt,
    5902              :                                              stmt_info, vec_dsts, gsi,
    5903              :                                              slp_node, code1,
    5904              :                                              modifier == NARROW_SRC);
    5905              :       /* After demoting op0 to cvt_type, convert it to dest.  */
    5906         9052 :       if (cvt_type && code == FLOAT_EXPR)
    5907              :         {
    5908           10 :           for (unsigned int i = 0; i != vec_oprnds0.length() / 2;  i++)
    5909              :             {
    5910              :               /* Arguments are ready, create the new vector stmt.  */
    5911            5 :               gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
    5912            5 :               gimple *new_stmt
    5913            5 :                 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
    5914            5 :               new_temp = make_ssa_name (vec_dest, new_stmt);
    5915            5 :               gimple_set_lhs (new_stmt, new_temp);
    5916            5 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    5917              : 
    5918              :               /* This is the last step of the conversion sequence. Store the
    5919              :                  vectors in SLP_NODE or in vector info of the scalar statement
    5920              :                  (or in STMT_VINFO_RELATED_STMT chain).  */
    5921            5 :               slp_node->push_vec_def (new_stmt);
    5922              :             }
    5923              :         }
    5924              :       break;
    5925              :     }
    5926              : 
    5927        22892 :   vec_oprnds0.release ();
    5928        22892 :   vec_oprnds1.release ();
    5929        22892 :   interm_types.release ();
    5930              : 
    5931        22892 :   return true;
    5932       179248 : }
    5933              : 
    5934              : /* Return true if we can assume from the scalar form of STMT_INFO that
    5935              :    neither the scalar nor the vector forms will generate code.  STMT_INFO
    5936              :    is known not to involve a data reference.  */
    5937              : 
    5938              : bool
    5939      3155396 : vect_nop_conversion_p (stmt_vec_info stmt_info)
    5940              : {
    5941      3155396 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    5942      2881896 :   if (!stmt || STMT_VINFO_DATA_REF (stmt_info))
    5943              :     return false;
    5944              : 
    5945       925259 :   tree lhs = gimple_assign_lhs (stmt);
    5946       925259 :   tree_code code = gimple_assign_rhs_code (stmt);
    5947       925259 :   tree rhs = gimple_assign_rhs1 (stmt);
    5948              : 
    5949       925259 :   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
    5950              :     return true;
    5951              : 
    5952       922357 :   if (CONVERT_EXPR_CODE_P (code))
    5953       229058 :     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
    5954              : 
    5955              :   return false;
    5956              : }
    5957              : 
    5958              : /* Function vectorizable_assignment.
    5959              : 
    5960              :    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
    5961              :    If COST_VEC is passed, calculate costs but don't change anything,
    5962              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    5963              :    it, and insert it at GSI.
    5964              :    Return true if STMT_INFO is vectorizable in this way.  */
    5965              : 
    5966              : static bool
    5967      2052043 : vectorizable_assignment (vec_info *vinfo,
    5968              :                          stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    5969              :                          slp_tree slp_node,
    5970              :                          stmt_vector_for_cost *cost_vec)
    5971              : {
    5972      2052043 :   tree vec_dest;
    5973      2052043 :   tree scalar_dest;
    5974      2052043 :   tree op;
    5975      2052043 :   tree new_temp;
    5976      2052043 :   enum vect_def_type dt[1] = {vect_unknown_def_type};
    5977      2052043 :   int i;
    5978      2052043 :   vec<tree> vec_oprnds = vNULL;
    5979      2052043 :   tree vop;
    5980      2052043 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    5981      2052043 :   enum tree_code code;
    5982      2052043 :   tree vectype_in;
    5983              : 
    5984      2052043 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    5985              :     return false;
    5986              : 
    5987      2052043 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    5988       234683 :       && cost_vec)
    5989              :     return false;
    5990              : 
    5991              :   /* Is vectorizable assignment?  */
    5992      3712090 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    5993      1745244 :   if (!stmt)
    5994              :     return false;
    5995              : 
    5996      1745244 :   scalar_dest = gimple_assign_lhs (stmt);
    5997      1745244 :   if (TREE_CODE (scalar_dest) != SSA_NAME)
    5998              :     return false;
    5999              : 
    6000       931216 :   if (STMT_VINFO_DATA_REF (stmt_info))
    6001              :     return false;
    6002              : 
    6003       392112 :   code = gimple_assign_rhs_code (stmt);
    6004       392112 :   if (!(gimple_assign_single_p (stmt)
    6005       390583 :         || code == PAREN_EXPR
    6006       389407 :         || CONVERT_EXPR_CODE_P (code)))
    6007              :     return false;
    6008              : 
    6009        95799 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
    6010        95799 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    6011              : 
    6012        95799 :   slp_tree slp_op;
    6013        95799 :   if (!vect_is_simple_use (vinfo, slp_node, 0, &op, &slp_op,
    6014              :                            &dt[0], &vectype_in))
    6015              :     {
    6016            0 :       if (dump_enabled_p ())
    6017            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6018              :                          "use not simple.\n");
    6019            0 :       return false;
    6020              :     }
    6021        95799 :   if (!vectype_in)
    6022        17667 :     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
    6023              : 
    6024              :   /* We can handle VIEW_CONVERT conversions that do not change the number
    6025              :      of elements or the vector size or other conversions when the component
    6026              :      types are nop-convertible.  */
    6027        95799 :   if (!vectype_in
    6028        95521 :       || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
    6029        88404 :       || (code == VIEW_CONVERT_EXPR
    6030         2802 :           && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
    6031         2802 :                        GET_MODE_SIZE (TYPE_MODE (vectype_in))))
    6032       184203 :       || (CONVERT_EXPR_CODE_P (code)
    6033        85731 :           && !tree_nop_conversion_p (TREE_TYPE (vectype),
    6034        85731 :                                      TREE_TYPE (vectype_in))))
    6035        10334 :     return false;
    6036              : 
    6037       256299 :   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
    6038              :     {
    6039            2 :       if (dump_enabled_p ())
    6040            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6041              :                          "can't convert between boolean and non "
    6042            0 :                          "boolean vectors %T\n", TREE_TYPE (op));
    6043              : 
    6044            2 :       return false;
    6045              :     }
    6046              : 
    6047              :   /* We do not handle bit-precision changes.  */
    6048        85463 :   if ((CONVERT_EXPR_CODE_P (code)
    6049         2673 :        || code == VIEW_CONVERT_EXPR)
    6050        84191 :       && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
    6051        82845 :            && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
    6052        83876 :           || (INTEGRAL_TYPE_P (TREE_TYPE (op))
    6053        79069 :               && !type_has_mode_precision_p (TREE_TYPE (op))))
    6054              :       /* But a conversion that does not change the bit-pattern is ok.  */
    6055        86193 :       && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
    6056          730 :            && INTEGRAL_TYPE_P (TREE_TYPE (op))
    6057          730 :            && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
    6058          730 :                > TYPE_PRECISION (TREE_TYPE (op)))
    6059          415 :              && TYPE_UNSIGNED (TREE_TYPE (op)))
    6060          331 :                || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
    6061          331 :                    == TYPE_PRECISION (TREE_TYPE (op))))))
    6062              :     {
    6063          266 :       if (dump_enabled_p ())
    6064            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6065              :                          "type conversion to/from bit-precision "
    6066              :                          "unsupported.\n");
    6067          266 :       return false;
    6068              :     }
    6069              : 
    6070        85197 :   if (cost_vec) /* transformation not required.  */
    6071              :     {
    6072        69173 :       if (!vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
    6073              :         {
    6074            0 :           if (dump_enabled_p ())
    6075            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6076              :                              "incompatible vector types for invariants\n");
    6077            0 :           return false;
    6078              :         }
    6079        69173 :       SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
    6080        69173 :       DUMP_VECT_SCOPE ("vectorizable_assignment");
    6081        69173 :       if (!vect_nop_conversion_p (stmt_info))
    6082          962 :         vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
    6083        69173 :       return true;
    6084              :     }
    6085              : 
    6086              :   /* Transform.  */
    6087        16024 :   if (dump_enabled_p ())
    6088         3625 :     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
    6089              : 
    6090              :   /* Handle def.  */
    6091        16024 :   vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6092              : 
    6093              :   /* Handle use.  */
    6094        16024 :   vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
    6095              : 
    6096              :   /* Arguments are ready. create the new vector stmt.  */
    6097        36189 :   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
    6098              :     {
    6099        20165 :       if (CONVERT_EXPR_CODE_P (code)
    6100          679 :           || code == VIEW_CONVERT_EXPR)
    6101        19620 :         vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
    6102        20165 :       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
    6103        20165 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6104        20165 :       gimple_assign_set_lhs (new_stmt, new_temp);
    6105        20165 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    6106        20165 :       slp_node->push_vec_def (new_stmt);
    6107              :     }
    6108              : 
    6109        16024 :   vec_oprnds.release ();
    6110        16024 :   return true;
    6111              : }
    6112              : 
    6113              : 
    6114              : /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
    6115              :    either as shift by a scalar or by a vector.  */
    6116              : 
    6117              : bool
    6118       298191 : vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
    6119              : {
    6120       298191 :   optab optab;
    6121       298191 :   tree vectype;
    6122              : 
    6123       298191 :   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6124       298191 :   if (!vectype)
    6125              :     return false;
    6126              : 
    6127       298191 :   optab = optab_for_tree_code (code, vectype, optab_scalar);
    6128       298191 :   if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
    6129              :     return true;
    6130              : 
    6131       262058 :   optab = optab_for_tree_code (code, vectype, optab_vector);
    6132       262058 :   if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
    6133              :     return true;
    6134              : 
    6135              :   return false;
    6136              : }
    6137              : 
    6138              : 
    6139              : /* Function vectorizable_shift.
    6140              : 
    6141              :    Check if STMT_INFO performs a shift operation that can be vectorized.
    6142              :    If COST_VEC is passed, calculate costs but don't change anything,
    6143              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    6144              :    it, and insert it at GSI.
    6145              :    Return true if STMT_INFO is vectorizable in this way.  */
    6146              : 
    6147              : static bool
    6148       726048 : vectorizable_shift (vec_info *vinfo,
    6149              :                     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    6150              :                     slp_tree slp_node,
    6151              :                     stmt_vector_for_cost *cost_vec)
    6152              : {
    6153       726048 :   tree vec_dest;
    6154       726048 :   tree scalar_dest;
    6155       726048 :   tree op0, op1 = NULL;
    6156       726048 :   tree vec_oprnd1 = NULL_TREE;
    6157       726048 :   tree vectype;
    6158       726048 :   enum tree_code code;
    6159       726048 :   machine_mode vec_mode;
    6160       726048 :   tree new_temp;
    6161       726048 :   optab optab;
    6162       726048 :   int icode;
    6163       726048 :   machine_mode optab_op2_mode;
    6164       726048 :   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
    6165       726048 :   poly_uint64 nunits_in;
    6166       726048 :   poly_uint64 nunits_out;
    6167       726048 :   tree vectype_out;
    6168       726048 :   tree op1_vectype;
    6169       726048 :   int i;
    6170       726048 :   vec<tree> vec_oprnds0 = vNULL;
    6171       726048 :   vec<tree> vec_oprnds1 = vNULL;
    6172       726048 :   tree vop0, vop1;
    6173       726048 :   unsigned int k;
    6174       726048 :   bool scalar_shift_arg = true;
    6175       726048 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    6176       726048 :   bool incompatible_op1_vectype_p = false;
    6177              : 
    6178       726048 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    6179              :     return false;
    6180              : 
    6181       726048 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    6182       234683 :       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
    6183       233193 :       && cost_vec)
    6184              :     return false;
    6185              : 
    6186              :   /* Is STMT a vectorizable binary/unary operation?   */
    6187      1089472 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    6188       421739 :   if (!stmt)
    6189              :     return false;
    6190              : 
    6191       421739 :   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
    6192              :     return false;
    6193              : 
    6194       421181 :   code = gimple_assign_rhs_code (stmt);
    6195              : 
    6196       421181 :   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
    6197              :       || code == RROTATE_EXPR))
    6198              :     return false;
    6199              : 
    6200        64437 :   scalar_dest = gimple_assign_lhs (stmt);
    6201        64437 :   vectype_out = SLP_TREE_VECTYPE (slp_node);
    6202        64437 :   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
    6203              :     {
    6204            0 :       if (dump_enabled_p ())
    6205            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6206              :                          "bit-precision shifts not supported.\n");
    6207            0 :       return false;
    6208              :     }
    6209              : 
    6210        64437 :   slp_tree slp_op0;
    6211        64437 :   if (!vect_is_simple_use (vinfo, slp_node,
    6212              :                            0, &op0, &slp_op0, &dt[0], &vectype))
    6213              :     {
    6214            0 :       if (dump_enabled_p ())
    6215            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6216              :                          "use not simple.\n");
    6217            0 :       return false;
    6218              :     }
    6219              :   /* If op0 is an external or constant def, infer the vector type
    6220              :      from the scalar type.  */
    6221        64437 :   if (!vectype)
    6222        15480 :     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
    6223        64437 :   if (!cost_vec)
    6224         8560 :     gcc_assert (vectype);
    6225        64437 :   if (!vectype)
    6226              :     {
    6227            0 :       if (dump_enabled_p ())
    6228            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6229              :                          "no vectype for scalar type\n");
    6230            0 :       return false;
    6231              :     }
    6232              : 
    6233        64437 :   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    6234        64437 :   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
    6235        64437 :   if (maybe_ne (nunits_out, nunits_in))
    6236              :     return false;
    6237              : 
    6238        64437 :   stmt_vec_info op1_def_stmt_info;
    6239        64437 :   slp_tree slp_op1;
    6240        64437 :   if (!vect_is_simple_use (vinfo, slp_node, 1, &op1, &slp_op1,
    6241              :                            &dt[1], &op1_vectype, &op1_def_stmt_info))
    6242              :     {
    6243            0 :       if (dump_enabled_p ())
    6244            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6245              :                          "use not simple.\n");
    6246            0 :       return false;
    6247              :     }
    6248              : 
    6249              :   /* Determine whether the shift amount is a vector, or scalar.  If the
    6250              :      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
    6251              : 
    6252        64437 :   if ((dt[1] == vect_internal_def
    6253        64437 :        || dt[1] == vect_induction_def
    6254        48112 :        || dt[1] == vect_nested_cycle)
    6255        16343 :       && SLP_TREE_LANES (slp_node) == 1)
    6256              :     scalar_shift_arg = false;
    6257        48149 :   else if (dt[1] == vect_constant_def
    6258              :            || dt[1] == vect_external_def
    6259        48149 :            || dt[1] == vect_internal_def)
    6260              :     {
    6261              :       /* In SLP, need to check whether the shift count is the same,
    6262              :          in loops if it is a constant or invariant, it is always
    6263              :          a scalar shift.  */
    6264        48143 :       vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
    6265        48143 :       stmt_vec_info slpstmt_info;
    6266              : 
    6267       126221 :       FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
    6268        78078 :         if (slpstmt_info)
    6269              :           {
    6270        78078 :             gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
    6271       156156 :             if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
    6272        78078 :               scalar_shift_arg = false;
    6273              :           }
    6274              : 
    6275              :       /* For internal SLP defs we have to make sure we see scalar stmts
    6276              :          for all vector elements.
    6277              :          ???  For different vectors we could resort to a different
    6278              :          scalar shift operand but code-generation below simply always
    6279              :          takes the first.  */
    6280        48143 :       if (dt[1] == vect_internal_def
    6281        48192 :           && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node),
    6282           49 :                        stmts.length ()))
    6283              :         scalar_shift_arg = false;
    6284              : 
    6285              :       /* If the shift amount is computed by a pattern stmt we cannot
    6286              :          use the scalar amount directly thus give up and use a vector
    6287              :          shift.  */
    6288        48143 :       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
    6289              :         scalar_shift_arg = false;
    6290              :     }
    6291              :   else
    6292              :     {
    6293            6 :       if (dump_enabled_p ())
    6294            6 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6295              :                          "operand mode requires invariant argument.\n");
    6296            6 :       return false;
    6297              :     }
    6298              : 
    6299              :   /* Vector shifted by vector.  */
    6300        64469 :   bool was_scalar_shift_arg = scalar_shift_arg;
    6301        48134 :   if (!scalar_shift_arg)
    6302              :     {
    6303        16335 :       optab = optab_for_tree_code (code, vectype, optab_vector);
    6304        16335 :       if (dump_enabled_p ())
    6305         1205 :         dump_printf_loc (MSG_NOTE, vect_location,
    6306              :                          "vector/vector shift/rotate found.\n");
    6307              : 
    6308        16335 :       if (!op1_vectype)
    6309           15 :         op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
    6310              :                                                    slp_op1);
    6311        16335 :       incompatible_op1_vectype_p
    6312        32670 :         = (op1_vectype == NULL_TREE
    6313        16335 :            || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
    6314        16335 :                         TYPE_VECTOR_SUBPARTS (vectype))
    6315        32668 :            || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
    6316        16328 :       if (incompatible_op1_vectype_p
    6317            7 :           && (SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
    6318            1 :               || slp_op1->refcnt != 1))
    6319              :         {
    6320            6 :           if (dump_enabled_p ())
    6321            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6322              :                              "unusable type for last operand in"
    6323              :                              " vector/vector shift/rotate.\n");
    6324            6 :           return false;
    6325              :         }
    6326              :     }
    6327              :   /* See if the machine has a vector shifted by scalar insn and if not
    6328              :      then see if it has a vector shifted by vector insn.  */
    6329              :   else
    6330              :     {
    6331        48096 :       optab = optab_for_tree_code (code, vectype, optab_scalar);
    6332        48096 :       if (optab
    6333        48096 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    6334              :         {
    6335        48096 :           if (dump_enabled_p ())
    6336         4946 :             dump_printf_loc (MSG_NOTE, vect_location,
    6337              :                              "vector/scalar shift/rotate found.\n");
    6338              :         }
    6339              :       else
    6340              :         {
    6341            0 :           optab = optab_for_tree_code (code, vectype, optab_vector);
    6342            0 :           if (optab
    6343            0 :               && can_implement_p (optab, TYPE_MODE (vectype)))
    6344              :             {
    6345            0 :               scalar_shift_arg = false;
    6346              : 
    6347            0 :               if (dump_enabled_p ())
    6348            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    6349              :                                  "vector/vector shift/rotate found.\n");
    6350              : 
    6351            0 :               if (!op1_vectype)
    6352            0 :                 op1_vectype = get_vectype_for_scalar_type (vinfo,
    6353            0 :                                                            TREE_TYPE (op1),
    6354              :                                                            slp_op1);
    6355              : 
    6356              :               /* Unlike the other binary operators, shifts/rotates have
    6357              :                  the rhs being int, instead of the same type as the lhs,
    6358              :                  so make sure the scalar is the right type if we are
    6359              :                  dealing with vectors of long long/long/short/char.  */
    6360            0 :               incompatible_op1_vectype_p
    6361            0 :                 = (!op1_vectype
    6362            0 :                    || !tree_nop_conversion_p (TREE_TYPE (vectype),
    6363            0 :                                               TREE_TYPE (op1)));
    6364            0 :               if (incompatible_op1_vectype_p
    6365            0 :                   && dt[1] == vect_internal_def)
    6366              :                 {
    6367            0 :                   if (dump_enabled_p ())
    6368            0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6369              :                                      "unusable type for last operand in"
    6370              :                                      " vector/vector shift/rotate.\n");
    6371            0 :                   return false;
    6372              :                 }
    6373              :             }
    6374              :         }
    6375              :     }
    6376              : 
    6377              :   /* Supportable by target?  */
    6378        64425 :   if (!optab)
    6379              :     {
    6380            0 :       if (dump_enabled_p ())
    6381            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6382              :                          "no shift optab for %s and %T.\n",
    6383              :                          get_tree_code_name (code), vectype);
    6384            0 :       return false;
    6385              :     }
    6386        64425 :   vec_mode = TYPE_MODE (vectype);
    6387        64425 :   icode = (int) optab_handler (optab, vec_mode);
    6388        64425 :   if (icode == CODE_FOR_nothing)
    6389              :     {
    6390         6110 :       if (dump_enabled_p ())
    6391          900 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6392              :                          "shift op not supported by target.\n");
    6393         6110 :       return false;
    6394              :     }
    6395              :   /* vector lowering cannot optimize vector shifts using word arithmetic.  */
    6396        58315 :   if (vect_emulated_vector_p (vectype))
    6397              :     return false;
    6398              : 
    6399        58315 :   if (cost_vec) /* transformation not required.  */
    6400              :     {
    6401        49755 :       if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
    6402        49755 :           || ((!scalar_shift_arg || dt[1] == vect_internal_def)
    6403         8077 :               && (!incompatible_op1_vectype_p
    6404            1 :                   || dt[1] == vect_constant_def)
    6405         8077 :               && !vect_maybe_update_slp_op_vectype
    6406         8077 :                          (slp_op1,
    6407              :                           incompatible_op1_vectype_p ? vectype : op1_vectype)))
    6408              :         {
    6409            0 :           if (dump_enabled_p ())
    6410            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6411              :                              "incompatible vector types for invariants\n");
    6412            0 :           return false;
    6413              :         }
    6414              :       /* Now adjust the constant shift amount in place.  */
    6415        49755 :       if (incompatible_op1_vectype_p
    6416            1 :           && dt[1] == vect_constant_def)
    6417            4 :         for (unsigned i = 0;
    6418            5 :              i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
    6419              :           {
    6420            4 :             SLP_TREE_SCALAR_OPS (slp_op1)[i]
    6421            4 :               = fold_convert (TREE_TYPE (vectype),
    6422              :                               SLP_TREE_SCALAR_OPS (slp_op1)[i]);
    6423            4 :             gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
    6424              :                          == INTEGER_CST));
    6425              :           }
    6426        49755 :       SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
    6427        49755 :       DUMP_VECT_SCOPE ("vectorizable_shift");
    6428        49755 :       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
    6429        49755 :       return true;
    6430              :     }
    6431              : 
    6432              :   /* Transform.  */
    6433              : 
    6434         8560 :   if (dump_enabled_p ())
    6435         2033 :     dump_printf_loc (MSG_NOTE, vect_location,
    6436              :                      "transform binary/unary operation.\n");
    6437              : 
    6438              :   /* Handle def.  */
    6439         8560 :   vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6440              : 
    6441         8560 :   unsigned nvectors = vect_get_num_copies (vinfo, slp_node);
    6442         8560 :   if (scalar_shift_arg && dt[1] != vect_internal_def)
    6443              :     {
    6444              :       /* Vector shl and shr insn patterns can be defined with scalar
    6445              :          operand 2 (shift operand).  In this case, use constant or loop
    6446              :          invariant op1 directly, without extending it to vector mode
    6447              :          first.  */
    6448         6398 :       optab_op2_mode = insn_data[icode].operand[2].mode;
    6449         6398 :       if (!VECTOR_MODE_P (optab_op2_mode))
    6450              :         {
    6451         6398 :           if (dump_enabled_p ())
    6452         1918 :             dump_printf_loc (MSG_NOTE, vect_location,
    6453              :                              "operand 1 using scalar mode.\n");
    6454         6398 :           vec_oprnd1 = op1;
    6455         6398 :           vec_oprnds1.create (nvectors);
    6456         6398 :           vec_oprnds1.quick_push (vec_oprnd1);
    6457              :           /* Store vec_oprnd1 for every vector stmt to be created.
    6458              :              We check during the analysis that all the shift arguments
    6459              :              are the same.
    6460              :              TODO: Allow different constants for different vector
    6461              :              stmts generated for an SLP instance.  */
    6462        14865 :           for (k = 0; k < nvectors - 1; k++)
    6463         2069 :             vec_oprnds1.quick_push (vec_oprnd1);
    6464              :         }
    6465              :     }
    6466         2162 :   else if (!scalar_shift_arg && incompatible_op1_vectype_p)
    6467              :     {
    6468            0 :       if (was_scalar_shift_arg)
    6469              :         {
    6470              :           /* If the argument was the same in all lanes create the
    6471              :              correctly typed vector shift amount directly.  Note
    6472              :              we made SLP scheduling think we use the original scalars,
    6473              :              so place the compensation code next to the shift which
    6474              :              is conservative.  See PR119640 where it otherwise breaks.  */
    6475            0 :           op1 = fold_convert (TREE_TYPE (vectype), op1);
    6476            0 :           op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
    6477              :                                   gsi);
    6478            0 :           vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
    6479              :                                          gsi);
    6480            0 :           vec_oprnds1.create (nvectors);
    6481            0 :           for (k = 0; k < nvectors; k++)
    6482            0 :             vec_oprnds1.quick_push (vec_oprnd1);
    6483              :         }
    6484            0 :       else if (dt[1] == vect_constant_def)
    6485              :         /* The constant shift amount has been adjusted in place.  */
    6486              :         ;
    6487              :       else
    6488            0 :         gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
    6489              :     }
    6490              : 
    6491              :   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
    6492              :      (a special case for certain kind of vector shifts); otherwise,
    6493              :      operand 1 should be of a vector type (the usual case).  */
    6494         2162 :   vect_get_vec_defs (vinfo, slp_node,
    6495              :                      op0, &vec_oprnds0,
    6496         8560 :                      vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
    6497              : 
    6498              :   /* Arguments are ready.  Create the new vector stmt.  */
    6499        22691 :   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
    6500              :     {
    6501              :       /* For internal defs where we need to use a scalar shift arg
    6502              :          extract the first lane.  */
    6503        14131 :       if (scalar_shift_arg && dt[1] == vect_internal_def)
    6504              :         {
    6505           10 :           vop1 = vec_oprnds1[0];
    6506           10 :           new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
    6507           10 :           gassign *new_stmt
    6508           10 :             = gimple_build_assign (new_temp,
    6509           10 :                                    build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
    6510              :                                            vop1,
    6511           10 :                                            TYPE_SIZE (TREE_TYPE (new_temp)),
    6512              :                                            bitsize_zero_node));
    6513           10 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    6514           10 :           vop1 = new_temp;
    6515           10 :         }
    6516              :       else
    6517        14121 :         vop1 = vec_oprnds1[i];
    6518        14131 :       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
    6519        14131 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6520        14131 :       gimple_assign_set_lhs (new_stmt, new_temp);
    6521        14131 :       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    6522        14131 :       slp_node->push_vec_def (new_stmt);
    6523              :     }
    6524              : 
    6525         8560 :   vec_oprnds0.release ();
    6526         8560 :   vec_oprnds1.release ();
    6527              : 
    6528         8560 :   return true;
    6529              : }
    6530              : 
    6531              : /* Function vectorizable_operation.
    6532              : 
    6533              :    Check if STMT_INFO performs a binary, unary or ternary operation that can
    6534              :    be vectorized.
    6535              :    If COST_VEC is passed, calculate costs but don't change anything,
    6536              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    6537              :    it, and insert it at GSI.
    6538              :    Return true if STMT_INFO is vectorizable in this way.  */
    6539              : 
    6540              : static bool
    6541      2687426 : vectorizable_operation (vec_info *vinfo,
    6542              :                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    6543              :                         slp_tree slp_node,
    6544              :                         stmt_vector_for_cost *cost_vec)
    6545              : {
    6546      2687426 :   tree vec_dest;
    6547      2687426 :   tree scalar_dest;
    6548      2687426 :   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
    6549      2687426 :   tree vectype;
    6550      2687426 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6551      2687426 :   enum tree_code code, orig_code;
    6552      2687426 :   machine_mode vec_mode;
    6553      2687426 :   tree new_temp;
    6554      2687426 :   int op_type;
    6555      2687426 :   optab optab;
    6556      2687426 :   bool target_support_p;
    6557      2687426 :   enum vect_def_type dt[3]
    6558              :     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
    6559      2687426 :   poly_uint64 nunits_in;
    6560      2687426 :   poly_uint64 nunits_out;
    6561      2687426 :   tree vectype_out;
    6562      2687426 :   int i;
    6563      2687426 :   vec<tree> vec_oprnds0 = vNULL;
    6564      2687426 :   vec<tree> vec_oprnds1 = vNULL;
    6565      2687426 :   vec<tree> vec_oprnds2 = vNULL;
    6566      2687426 :   tree vop0, vop1, vop2;
    6567      2687426 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    6568              : 
    6569      2687426 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    6570              :     return false;
    6571              : 
    6572      2687426 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    6573       234683 :       && cost_vec)
    6574              :     return false;
    6575              : 
    6576              :   /* Is STMT a vectorizable binary/unary operation?   */
    6577      4416646 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
    6578      2380627 :   if (!stmt)
    6579              :     return false;
    6580              : 
    6581              :   /* Loads and stores are handled in vectorizable_{load,store}.  */
    6582      2380627 :   if (STMT_VINFO_DATA_REF (stmt_info))
    6583              :     return false;
    6584              : 
    6585      1027495 :   orig_code = code = gimple_assign_rhs_code (stmt);
    6586              : 
    6587              :   /* Shifts are handled in vectorizable_shift.  */
    6588      1027495 :   if (code == LSHIFT_EXPR
    6589              :       || code == RSHIFT_EXPR
    6590              :       || code == LROTATE_EXPR
    6591      1027495 :       || code == RROTATE_EXPR)
    6592              :    return false;
    6593              : 
    6594              :   /* Comparisons are handled in vectorizable_comparison.  */
    6595       971618 :   if (TREE_CODE_CLASS (code) == tcc_comparison)
    6596              :     return false;
    6597              : 
    6598              :   /* Conditions are handled in vectorizable_condition.  */
    6599       787542 :   if (code == COND_EXPR)
    6600              :     return false;
    6601              : 
    6602              :   /* For pointer addition and subtraction, we should use the normal
    6603              :      plus and minus for the vector operation.  */
    6604       761193 :   if (code == POINTER_PLUS_EXPR)
    6605              :     code = PLUS_EXPR;
    6606       742591 :   if (code == POINTER_DIFF_EXPR)
    6607          945 :     code = MINUS_EXPR;
    6608              : 
    6609              :   /* Support only unary or binary operations.  */
    6610       761193 :   op_type = TREE_CODE_LENGTH (code);
    6611       761193 :   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
    6612              :     {
    6613            0 :       if (dump_enabled_p ())
    6614            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6615              :                          "num. args = %d (not unary/binary/ternary op).\n",
    6616              :                          op_type);
    6617            0 :       return false;
    6618              :     }
    6619              : 
    6620       761193 :   scalar_dest = gimple_assign_lhs (stmt);
    6621       761193 :   vectype_out = SLP_TREE_VECTYPE (slp_node);
    6622              : 
    6623              :   /* Most operations cannot handle bit-precision types without extra
    6624              :      truncations.  */
    6625       761193 :   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
    6626       750151 :   if (!mask_op_p
    6627       750151 :       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
    6628              :       /* Exception are bitwise binary operations.  */
    6629              :       && code != BIT_IOR_EXPR
    6630         1400 :       && code != BIT_XOR_EXPR
    6631          894 :       && code != BIT_AND_EXPR)
    6632              :     {
    6633          690 :       if (dump_enabled_p ())
    6634            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6635              :                          "bit-precision arithmetic not supported.\n");
    6636          690 :       return false;
    6637              :     }
    6638              : 
    6639       760503 :   slp_tree slp_op0;
    6640       760503 :   if (!vect_is_simple_use (vinfo, slp_node,
    6641              :                            0, &op0, &slp_op0, &dt[0], &vectype))
    6642              :     {
    6643            0 :       if (dump_enabled_p ())
    6644            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6645              :                          "use not simple.\n");
    6646            0 :       return false;
    6647              :     }
    6648       760503 :   bool is_invariant = (dt[0] == vect_external_def
    6649       760503 :                        || dt[0] == vect_constant_def);
    6650              :   /* If op0 is an external or constant def, infer the vector type
    6651              :      from the scalar type.  */
    6652       760503 :   if (!vectype)
    6653              :     {
    6654              :       /* For boolean type we cannot determine vectype by
    6655              :          invariant value (don't know whether it is a vector
    6656              :          of booleans or vector of integers).  We use output
    6657              :          vectype because operations on boolean don't change
    6658              :          type.  */
    6659        66915 :       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
    6660              :         {
    6661         1418 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
    6662              :             {
    6663          228 :               if (dump_enabled_p ())
    6664            0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6665              :                                  "not supported operation on bool value.\n");
    6666          228 :               return false;
    6667              :             }
    6668         1190 :           vectype = vectype_out;
    6669              :         }
    6670              :       else
    6671        65497 :         vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
    6672              :                                                slp_node);
    6673              :     }
    6674       760275 :   if (!cost_vec)
    6675       114588 :     gcc_assert (vectype);
    6676       760275 :   if (!vectype)
    6677              :     {
    6678          290 :       if (dump_enabled_p ())
    6679            2 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6680              :                          "no vectype for scalar type %T\n",
    6681            2 :                          TREE_TYPE (op0));
    6682              : 
    6683          290 :       return false;
    6684              :     }
    6685              : 
    6686       759985 :   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    6687       759985 :   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
    6688       759985 :   if (maybe_ne (nunits_out, nunits_in)
    6689       759985 :       || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
    6690        11731 :     return false;
    6691              : 
    6692       748254 :   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
    6693       748254 :   slp_tree slp_op1 = NULL, slp_op2 = NULL;
    6694       748254 :   if (op_type == binary_op || op_type == ternary_op)
    6695              :     {
    6696       669385 :       if (!vect_is_simple_use (vinfo, slp_node,
    6697              :                                1, &op1, &slp_op1, &dt[1], &vectype2))
    6698              :         {
    6699            0 :           if (dump_enabled_p ())
    6700            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6701              :                              "use not simple.\n");
    6702            0 :           return false;
    6703              :         }
    6704       669385 :       is_invariant &= (dt[1] == vect_external_def
    6705       669385 :                        || dt[1] == vect_constant_def);
    6706       669385 :       if (vectype2
    6707      1136738 :           && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
    6708       467353 :               || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
    6709       467353 :                                          TREE_TYPE (vectype2))))
    6710            4 :         return false;
    6711              :     }
    6712       748250 :   if (op_type == ternary_op)
    6713              :     {
    6714            0 :       if (!vect_is_simple_use (vinfo, slp_node,
    6715              :                                2, &op2, &slp_op2, &dt[2], &vectype3))
    6716              :         {
    6717            0 :           if (dump_enabled_p ())
    6718            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6719              :                              "use not simple.\n");
    6720            0 :           return false;
    6721              :         }
    6722            0 :       is_invariant &= (dt[2] == vect_external_def
    6723            0 :                        || dt[2] == vect_constant_def);
    6724            0 :       if (vectype3
    6725            0 :           && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
    6726            0 :               || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
    6727            0 :                                          TREE_TYPE (vectype3))))
    6728            0 :         return false;
    6729              :     }
    6730              : 
    6731              :   /* Multiple types in SLP are handled by creating the appropriate number of
    6732              :      vectorized stmts for each SLP node.  */
    6733       748250 :   auto vec_num = vect_get_num_copies (vinfo, slp_node);
    6734              : 
    6735              :   /* Reject attempts to combine mask types with nonmask types, e.g. if
    6736              :      we have an AND between a (nonmask) boolean loaded from memory and
    6737              :      a (mask) boolean result of a comparison.
    6738              : 
    6739              :      TODO: We could easily fix these cases up using pattern statements.  */
    6740       748250 :   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
    6741      1207654 :       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
    6742      1496500 :       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
    6743              :     {
    6744            0 :       if (dump_enabled_p ())
    6745            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6746              :                          "mixed mask and nonmask vector types\n");
    6747            0 :       return false;
    6748              :     }
    6749              : 
    6750              :   /* Supportable by target?  */
    6751              : 
    6752       748250 :   vec_mode = TYPE_MODE (vectype);
    6753       748250 :   optab = optab_for_tree_code (code, vectype, optab_default);
    6754       748250 :   if (!optab)
    6755              :     {
    6756        68451 :       if (dump_enabled_p ())
    6757         5961 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6758              :                          "no optab for %s and %T.\n",
    6759              :                          get_tree_code_name (code), vectype);
    6760        68451 :       return false;
    6761              :     }
    6762       679799 :   target_support_p = can_implement_p (optab, vec_mode);
    6763              : 
    6764       679799 :   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
    6765       679799 :   if (!target_support_p || using_emulated_vectors_p)
    6766              :     {
    6767        28815 :       if (dump_enabled_p ())
    6768         1124 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6769              :                          "op not supported by target.\n");
    6770              :       /* When vec_mode is not a vector mode and we verified ops we
    6771              :          do not have to lower like AND are natively supported let
    6772              :          those through even when the mode isn't word_mode.  For
    6773              :          ops we have to lower the lowering code assumes we are
    6774              :          dealing with word_mode.  */
    6775        57630 :       if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
    6776        28673 :           || !GET_MODE_SIZE (vec_mode).is_constant ()
    6777        28673 :           || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
    6778        23691 :                || !target_support_p)
    6779        60880 :               && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
    6780              :           /* Check only during analysis.  */
    6781        40564 :           || (cost_vec && !vect_can_vectorize_without_simd_p (code)))
    6782              :         {
    6783        28253 :           if (dump_enabled_p ())
    6784         1122 :             dump_printf (MSG_NOTE, "using word mode not possible.\n");
    6785        28253 :           return false;
    6786              :         }
    6787          562 :       if (dump_enabled_p ())
    6788            2 :         dump_printf_loc (MSG_NOTE, vect_location,
    6789              :                          "proceeding using word mode.\n");
    6790              :       using_emulated_vectors_p = true;
    6791              :     }
    6792              : 
    6793       651546 :   int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
    6794       651546 :   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
    6795       433464 :   vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
    6796       651546 :   internal_fn cond_fn = get_conditional_internal_fn (code);
    6797       651546 :   internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
    6798              : 
    6799              :   /* If operating on inactive elements could generate spurious traps,
    6800              :      we need to restrict the operation to active lanes.  Note that this
    6801              :      specifically doesn't apply to unhoisted invariants, since they
    6802              :      operate on the same value for every lane.
    6803              : 
    6804              :      Similarly, if this operation is part of a reduction, a fully-masked
    6805              :      loop should only change the active lanes of the reduction chain,
    6806              :      keeping the inactive lanes as-is.  */
    6807       624939 :   bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
    6808      1212949 :                             || reduc_idx >= 0);
    6809              : 
    6810       651546 :   if (cost_vec) /* transformation not required.  */
    6811              :     {
    6812       536958 :       if (loop_vinfo
    6813       330247 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
    6814        88784 :           && mask_out_inactive)
    6815              :         {
    6816        20416 :           if (cond_len_fn != IFN_LAST
    6817        20416 :               && direct_internal_fn_supported_p (cond_len_fn, vectype,
    6818              :                                                  OPTIMIZE_FOR_SPEED))
    6819            0 :             vect_record_loop_len (loop_vinfo, lens, vec_num, vectype,
    6820              :                                   1);
    6821        20416 :           else if (cond_fn != IFN_LAST
    6822        20416 :                    && direct_internal_fn_supported_p (cond_fn, vectype,
    6823              :                                                       OPTIMIZE_FOR_SPEED))
    6824         8514 :             vect_record_loop_mask (loop_vinfo, masks, vec_num,
    6825              :                                    vectype, NULL);
    6826              :           else
    6827              :             {
    6828        11902 :               if (dump_enabled_p ())
    6829          610 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6830              :                                  "can't use a fully-masked loop because no"
    6831              :                                  " conditional operation is available.\n");
    6832        11902 :               LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
    6833              :             }
    6834              :         }
    6835              : 
    6836              :       /* Put types on constant and invariant SLP children.  */
    6837       536958 :       if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
    6838       536912 :           || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
    6839      1073777 :           || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))
    6840              :         {
    6841          139 :           if (dump_enabled_p ())
    6842            3 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6843              :                              "incompatible vector types for invariants\n");
    6844          139 :           return false;
    6845              :         }
    6846              : 
    6847       536819 :       SLP_TREE_TYPE (slp_node) = op_vec_info_type;
    6848       536819 :       DUMP_VECT_SCOPE ("vectorizable_operation");
    6849       536819 :       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
    6850       536819 :       if (using_emulated_vectors_p)
    6851              :         {
    6852              :           /* The above vect_model_simple_cost call handles constants
    6853              :              in the prologue and (mis-)costs one of the stmts as
    6854              :              vector stmt.  See below for the actual lowering that will
    6855              :              be applied.  */
    6856          560 :           unsigned n = vect_get_num_copies (vinfo, slp_node);
    6857          560 :           switch (code)
    6858              :             {
    6859          201 :             case PLUS_EXPR:
    6860          201 :               n *= 5;
    6861          201 :               break;
    6862          328 :             case MINUS_EXPR:
    6863          328 :               n *= 6;
    6864          328 :               break;
    6865            0 :             case NEGATE_EXPR:
    6866            0 :               n *= 4;
    6867            0 :               break;
    6868              :             default:
    6869              :               /* Bit operations do not have extra cost and are accounted
    6870              :                  as vector stmt by vect_model_simple_cost.  */
    6871              :               n = 0;
    6872              :               break;
    6873              :             }
    6874          529 :           if (n != 0)
    6875              :             {
    6876              :               /* We also need to materialize two large constants.  */
    6877          529 :               record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
    6878              :                                 0, vect_prologue);
    6879          529 :               record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
    6880              :                                 0, vect_body);
    6881              :             }
    6882              :         }
    6883       536819 :       return true;
    6884              :     }
    6885              : 
    6886              :   /* Transform.  */
    6887              : 
    6888       114588 :   if (dump_enabled_p ())
    6889        16486 :     dump_printf_loc (MSG_NOTE, vect_location,
    6890              :                      "transform binary/unary operation.\n");
    6891              : 
    6892       114588 :   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
    6893       103217 :   bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
    6894              : 
    6895              :   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
    6896              :      vectors with unsigned elements, but the result is signed.  So, we
    6897              :      need to compute the MINUS_EXPR into vectype temporary and
    6898              :      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
    6899       114588 :   tree vec_cvt_dest = NULL_TREE;
    6900       114588 :   if (orig_code == POINTER_DIFF_EXPR)
    6901              :     {
    6902          110 :       vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6903          110 :       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
    6904              :     }
    6905              :   /* For reduction operations with undefined overflow behavior make sure to
    6906              :      pun them to unsigned since we change the order of evaluation.
    6907              :      ???  Avoid for in-order reductions?  */
    6908       114478 :   else if (arith_code_with_undefined_signed_overflow (orig_code)
    6909        97852 :            && ANY_INTEGRAL_TYPE_P (vectype)
    6910        47782 :            && TYPE_OVERFLOW_UNDEFINED (vectype)
    6911       140087 :            && SLP_TREE_REDUC_IDX (slp_node) != -1)
    6912              :     {
    6913         2465 :       gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
    6914              :                   || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
    6915         2465 :       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
    6916         2465 :       vectype = unsigned_type_for (vectype);
    6917         2465 :       vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6918              :     }
    6919              :   /* Handle def.  */
    6920              :   else
    6921       112013 :     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
    6922              : 
    6923       114588 :   vect_get_vec_defs (vinfo, slp_node,
    6924              :                      op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
    6925              :   /* Arguments are ready.  Create the new vector stmt.  */
    6926       253441 :   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
    6927              :     {
    6928       138853 :       gimple *new_stmt = NULL;
    6929       277706 :       vop1 = ((op_type == binary_op || op_type == ternary_op)
    6930       138853 :               ? vec_oprnds1[i] : NULL_TREE);
    6931       138853 :       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
    6932              : 
    6933       138853 :       if (vec_cvt_dest
    6934       138853 :           && !useless_type_conversion_p (vectype, TREE_TYPE (vop0)))
    6935              :         {
    6936         2918 :           new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0);
    6937         2918 :           new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
    6938              :                                           new_temp);
    6939         2918 :           new_temp = make_ssa_name (vec_dest, new_stmt);
    6940         2918 :           gimple_assign_set_lhs (new_stmt, new_temp);
    6941         2918 :           vect_finish_stmt_generation (vinfo, stmt_info,
    6942              :                                        new_stmt, gsi);
    6943         2918 :           vop0 = new_temp;
    6944              :         }
    6945       138853 :       if (vop1
    6946       136294 :           && vec_cvt_dest
    6947       141896 :           && !useless_type_conversion_p (vectype, TREE_TYPE (vop1)))
    6948              :         {
    6949         2918 :           new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1);
    6950         2918 :           new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
    6951              :                                           new_temp);
    6952         2918 :           new_temp = make_ssa_name (vec_dest, new_stmt);
    6953         2918 :           gimple_assign_set_lhs (new_stmt, new_temp);
    6954         2918 :           vect_finish_stmt_generation (vinfo, stmt_info,
    6955              :                                        new_stmt, gsi);
    6956         2918 :           vop1 = new_temp;
    6957              :         }
    6958       138853 :       if (vop2
    6959            0 :           && vec_cvt_dest
    6960       138853 :           && !useless_type_conversion_p (vectype, TREE_TYPE (vop2)))
    6961              :         {
    6962            0 :           new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2);
    6963            0 :           new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
    6964              :                                           new_temp);
    6965            0 :           new_temp = make_ssa_name (vec_dest, new_stmt);
    6966            0 :           gimple_assign_set_lhs (new_stmt, new_temp);
    6967            0 :           vect_finish_stmt_generation (vinfo, stmt_info,
    6968              :                                        new_stmt, gsi);
    6969            0 :           vop2 = new_temp;
    6970              :         }
    6971              : 
    6972       138853 :       if (using_emulated_vectors_p)
    6973              :         {
    6974              :           /* Lower the operation.  This follows vector lowering.  */
    6975            2 :           tree word_type = build_nonstandard_integer_type
    6976            2 :                              (GET_MODE_BITSIZE (vec_mode).to_constant (), 1);
    6977            2 :           tree wvop0 = make_ssa_name (word_type);
    6978            2 :           new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
    6979              :                                           build1 (VIEW_CONVERT_EXPR,
    6980              :                                                   word_type, vop0));
    6981            2 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    6982            2 :           tree wvop1 = NULL_TREE;
    6983            2 :           if (vop1)
    6984              :             {
    6985            2 :               wvop1 = make_ssa_name (word_type);
    6986            2 :               new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
    6987              :                                               build1 (VIEW_CONVERT_EXPR,
    6988              :                                                       word_type, vop1));
    6989            2 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    6990              :             }
    6991              : 
    6992            2 :           tree result_low;
    6993            2 :           if (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
    6994              :             {
    6995            1 :               unsigned int width = vector_element_bits (vectype);
    6996            1 :               tree inner_type = TREE_TYPE (vectype);
    6997            1 :               HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
    6998            1 :               tree low_bits
    6999            1 :                 = build_replicated_int_cst (word_type, width, max >> 1);
    7000            1 :               tree high_bits
    7001            2 :                 = build_replicated_int_cst (word_type,
    7002            1 :                                             width, max & ~(max >> 1));
    7003            1 :               tree signs;
    7004            1 :               if (code == PLUS_EXPR || code == MINUS_EXPR)
    7005              :                 {
    7006            1 :                   signs = make_ssa_name (word_type);
    7007            1 :                   new_stmt = gimple_build_assign (signs,
    7008              :                                                   BIT_XOR_EXPR, wvop0, wvop1);
    7009            1 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7010            1 :                   tree b_low = make_ssa_name (word_type);
    7011            1 :                   new_stmt = gimple_build_assign (b_low, BIT_AND_EXPR,
    7012              :                                                   wvop1, low_bits);
    7013            1 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7014            1 :                   tree a_low = make_ssa_name (word_type);
    7015            1 :                   if (code == PLUS_EXPR)
    7016            1 :                     new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
    7017              :                                                     wvop0, low_bits);
    7018              :                   else
    7019            0 :                     new_stmt = gimple_build_assign (a_low, BIT_IOR_EXPR,
    7020              :                                                     wvop0, high_bits);
    7021            1 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7022            1 :                   if (code == MINUS_EXPR)
    7023              :                     {
    7024            0 :                       new_stmt = gimple_build_assign (NULL_TREE,
    7025              :                                                       BIT_NOT_EXPR, signs);
    7026            0 :                       signs = make_ssa_name (word_type);
    7027            0 :                       gimple_assign_set_lhs (new_stmt, signs);
    7028            0 :                       vect_finish_stmt_generation (vinfo, stmt_info,
    7029              :                                                    new_stmt, gsi);
    7030              :                     }
    7031            1 :                   new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
    7032              :                                                   signs, high_bits);
    7033            1 :                   signs = make_ssa_name (word_type);
    7034            1 :                   gimple_assign_set_lhs (new_stmt, signs);
    7035            1 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7036            1 :                   result_low = make_ssa_name (word_type);
    7037            1 :                   new_stmt = gimple_build_assign (result_low, code,
    7038              :                                                   a_low, b_low);
    7039            1 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7040              :                 }
    7041              :               else /* if (code == NEGATE_EXPR) */
    7042              :                 {
    7043            0 :                   tree a_low = make_ssa_name (word_type);
    7044            0 :                   new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
    7045              :                                                   wvop0, low_bits);
    7046            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7047            0 :                   signs = make_ssa_name (word_type);
    7048            0 :                   new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
    7049            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7050            0 :                   new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
    7051              :                                                   signs, high_bits);
    7052            0 :                   signs = make_ssa_name (word_type);
    7053            0 :                   gimple_assign_set_lhs (new_stmt, signs);
    7054            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7055            0 :                   result_low = make_ssa_name (word_type);
    7056            0 :                   new_stmt = gimple_build_assign (result_low,
    7057              :                                                   MINUS_EXPR, high_bits, a_low);
    7058            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7059              :                 }
    7060            1 :               new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR,
    7061              :                                               result_low, signs);
    7062            1 :               result_low = make_ssa_name (word_type);
    7063            1 :               gimple_assign_set_lhs (new_stmt, result_low);
    7064            1 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7065              :             }
    7066              :           else
    7067              :             {
    7068            1 :               new_stmt = gimple_build_assign (NULL_TREE, code, wvop0, wvop1);
    7069            1 :               result_low = make_ssa_name (word_type);
    7070            1 :               gimple_assign_set_lhs (new_stmt, result_low);
    7071            1 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7072              : 
    7073              :             }
    7074            2 :           new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
    7075              :                                           build1 (VIEW_CONVERT_EXPR,
    7076              :                                                   vectype, result_low));
    7077            2 :           new_temp = make_ssa_name (vectype);
    7078            2 :           gimple_assign_set_lhs (new_stmt, new_temp);
    7079            2 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7080              :         }
    7081       138851 :       else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
    7082              :         {
    7083           16 :           tree mask;
    7084           16 :           if (masked_loop_p)
    7085           16 :             mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
    7086              :                                        vec_num, vectype, i);
    7087              :           else
    7088              :             /* Dummy mask.  */
    7089            0 :             mask = build_minus_one_cst (truth_type_for (vectype));
    7090           16 :           auto_vec<tree> vops (6);
    7091           16 :           vops.quick_push (mask);
    7092           16 :           vops.quick_push (vop0);
    7093           16 :           if (vop1)
    7094           16 :             vops.quick_push (vop1);
    7095           16 :           if (vop2)
    7096            0 :             vops.quick_push (vop2);
    7097           16 :           if (reduc_idx >= 0)
    7098              :             {
    7099              :               /* Perform the operation on active elements only and take
    7100              :                  inactive elements from the reduction chain input.  */
    7101            8 :               gcc_assert (!vop2);
    7102            8 :               vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
    7103              :             }
    7104              :           else
    7105              :             {
    7106            8 :               auto else_value = targetm.preferred_else_value
    7107            8 :                 (cond_fn, vectype, vops.length () - 1, &vops[1]);
    7108            8 :               vops.quick_push (else_value);
    7109              :             }
    7110           16 :           if (len_loop_p)
    7111              :             {
    7112            0 :               tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
    7113            0 :                                             vec_num, vectype, i, 1, true);
    7114            0 :               signed char biasval
    7115            0 :                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
    7116            0 :               tree bias = build_int_cst (intQI_type_node, biasval);
    7117            0 :               vops.quick_push (len);
    7118            0 :               vops.quick_push (bias);
    7119              :             }
    7120           16 :           gcall *call
    7121           16 :             = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
    7122              :                                                             : cond_len_fn,
    7123              :                                               vops);
    7124           16 :           new_temp = make_ssa_name (vec_dest, call);
    7125           16 :           gimple_call_set_lhs (call, new_temp);
    7126           16 :           gimple_call_set_nothrow (call, true);
    7127           16 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    7128           16 :           new_stmt = call;
    7129           16 :         }
    7130              :       else
    7131              :         {
    7132       138835 :           tree mask = NULL_TREE;
    7133              :           /* When combining two masks check if either of them is elsewhere
    7134              :              combined with a loop mask, if that's the case we can mark that the
    7135              :              new combined mask doesn't need to be combined with a loop mask.  */
    7136       138835 :           if (masked_loop_p
    7137       138835 :               && code == BIT_AND_EXPR
    7138       138835 :               && VECTOR_BOOLEAN_TYPE_P (vectype))
    7139              :             {
    7140            8 :               if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, vec_num }))
    7141              :                 {
    7142            0 :                   mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
    7143              :                                              vec_num, vectype, i);
    7144              : 
    7145            0 :                   vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
    7146              :                                            vop0, gsi);
    7147              :                 }
    7148              : 
    7149            8 :               if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, vec_num }))
    7150              :                 {
    7151            0 :                   mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
    7152              :                                              vec_num, vectype, i);
    7153              : 
    7154            0 :                   vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
    7155              :                                            vop1, gsi);
    7156              :                 }
    7157              :             }
    7158              : 
    7159       138835 :           new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
    7160       138835 :           new_temp = make_ssa_name (vec_dest, new_stmt);
    7161       138835 :           gimple_assign_set_lhs (new_stmt, new_temp);
    7162       138835 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    7163       138835 :           if (using_emulated_vectors_p)
    7164              :             suppress_warning (new_stmt, OPT_Wvector_operation_performance);
    7165              : 
    7166              :           /* Enter the combined value into the vector cond hash so we don't
    7167              :              AND it with a loop mask again.  */
    7168       138835 :           if (mask)
    7169            0 :             loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
    7170              :         }
    7171              : 
    7172       138853 :       if (vec_cvt_dest)
    7173              :         {
    7174         3043 :           new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
    7175         3043 :           new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
    7176              :                                           new_temp);
    7177         3043 :           new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
    7178         3043 :           gimple_assign_set_lhs (new_stmt, new_temp);
    7179         3043 :           vect_finish_stmt_generation (vinfo, stmt_info,
    7180              :                                        new_stmt, gsi);
    7181              :         }
    7182              : 
    7183       138853 :       slp_node->push_vec_def (new_stmt);
    7184              :     }
    7185              : 
    7186       114588 :   vec_oprnds0.release ();
    7187       114588 :   vec_oprnds1.release ();
    7188       114588 :   vec_oprnds2.release ();
    7189              : 
    7190       114588 :   return true;
    7191              : }
    7192              : 
    7193              : /* A helper function to ensure data reference DR_INFO's base alignment.  */
    7194              : 
    7195              : static void
    7196      1961757 : ensure_base_align (dr_vec_info *dr_info)
    7197              : {
    7198              :   /* Alignment is only analyzed for the first element of a DR group,
    7199              :      use that to look at base alignment we need to enforce.  */
    7200      1961757 :   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
    7201      1426245 :     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
    7202              : 
    7203      1961757 :   gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
    7204              : 
    7205      1961757 :   if (dr_info->base_misaligned)
    7206              :     {
    7207       169233 :       tree base_decl = dr_info->base_decl;
    7208              : 
    7209              :       // We should only be able to increase the alignment of a base object if
    7210              :       // we know what its new alignment should be at compile time.
    7211       169233 :       unsigned HOST_WIDE_INT align_base_to =
    7212       169233 :         DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
    7213              : 
    7214       169233 :       if (decl_in_symtab_p (base_decl))
    7215         4729 :         symtab_node::get (base_decl)->increase_alignment (align_base_to);
    7216       164504 :       else if (DECL_ALIGN (base_decl) < align_base_to)
    7217              :         {
    7218       131511 :           SET_DECL_ALIGN (base_decl, align_base_to);
    7219       131511 :           DECL_USER_ALIGN (base_decl) = 1;
    7220              :         }
    7221       169233 :       dr_info->base_misaligned = false;
    7222              :     }
    7223      1961757 : }
    7224              : 
    7225              : 
    7226              : /* Function get_group_alias_ptr_type.
    7227              : 
    7228              :    Return the alias type for the group starting at FIRST_STMT_INFO.  */
    7229              : 
    7230              : static tree
    7231      1631582 : get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
    7232              : {
    7233      1631582 :   struct data_reference *first_dr, *next_dr;
    7234              : 
    7235      1631582 :   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
    7236      1631582 :   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
    7237      3915464 :   while (next_stmt_info)
    7238              :     {
    7239      2416056 :       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
    7240      4832112 :       if (get_alias_set (DR_REF (first_dr))
    7241      2416056 :           != get_alias_set (DR_REF (next_dr)))
    7242              :         {
    7243       132174 :           if (dump_enabled_p ())
    7244           30 :             dump_printf_loc (MSG_NOTE, vect_location,
    7245              :                              "conflicting alias set types.\n");
    7246       132174 :           return ptr_type_node;
    7247              :         }
    7248      2283882 :       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
    7249              :     }
    7250      1499408 :   return reference_alias_ptr_type (DR_REF (first_dr));
    7251              : }
    7252              : 
    7253              : 
    7254              : /* Function scan_operand_equal_p.
    7255              : 
    7256              :    Helper function for check_scan_store.  Compare two references
    7257              :    with .GOMP_SIMD_LANE bases.  */
    7258              : 
    7259              : static bool
    7260         1284 : scan_operand_equal_p (tree ref1, tree ref2)
    7261              : {
    7262         1284 :   tree ref[2] = { ref1, ref2 };
    7263         1284 :   poly_int64 bitsize[2], bitpos[2];
    7264              :   tree offset[2], base[2];
    7265         3852 :   for (int i = 0; i < 2; ++i)
    7266              :     {
    7267         2568 :       machine_mode mode;
    7268         2568 :       int unsignedp, reversep, volatilep = 0;
    7269         2568 :       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
    7270              :                                      &offset[i], &mode, &unsignedp,
    7271              :                                      &reversep, &volatilep);
    7272         2568 :       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
    7273            0 :         return false;
    7274         2568 :       if (TREE_CODE (base[i]) == MEM_REF
    7275           42 :           && offset[i] == NULL_TREE
    7276         2610 :           && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
    7277              :         {
    7278           42 :           gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
    7279           42 :           if (is_gimple_assign (def_stmt)
    7280           42 :               && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
    7281           42 :               && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
    7282           84 :               && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
    7283              :             {
    7284           42 :               if (maybe_ne (mem_ref_offset (base[i]), 0))
    7285              :                 return false;
    7286           42 :               base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
    7287           42 :               offset[i] = gimple_assign_rhs2 (def_stmt);
    7288              :             }
    7289              :         }
    7290              :     }
    7291              : 
    7292         1284 :   if (!operand_equal_p (base[0], base[1], 0))
    7293              :     return false;
    7294          934 :   if (maybe_ne (bitsize[0], bitsize[1]))
    7295              :     return false;
    7296          934 :   if (offset[0] != offset[1])
    7297              :     {
    7298          916 :       if (!offset[0] || !offset[1])
    7299              :         return false;
    7300          916 :       if (!operand_equal_p (offset[0], offset[1], 0))
    7301              :         {
    7302              :           tree step[2];
    7303            0 :           for (int i = 0; i < 2; ++i)
    7304              :             {
    7305            0 :               step[i] = integer_one_node;
    7306            0 :               if (TREE_CODE (offset[i]) == SSA_NAME)
    7307              :                 {
    7308            0 :                   gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
    7309            0 :                   if (is_gimple_assign (def_stmt)
    7310            0 :                       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
    7311            0 :                       && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
    7312              :                           == INTEGER_CST))
    7313              :                     {
    7314            0 :                       step[i] = gimple_assign_rhs2 (def_stmt);
    7315            0 :                       offset[i] = gimple_assign_rhs1 (def_stmt);
    7316              :                     }
    7317              :                 }
    7318            0 :               else if (TREE_CODE (offset[i]) == MULT_EXPR)
    7319              :                 {
    7320            0 :                   step[i] = TREE_OPERAND (offset[i], 1);
    7321            0 :                   offset[i] = TREE_OPERAND (offset[i], 0);
    7322              :                 }
    7323            0 :               tree rhs1 = NULL_TREE;
    7324            0 :               if (TREE_CODE (offset[i]) == SSA_NAME)
    7325              :                 {
    7326            0 :                   gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
    7327            0 :                   if (gimple_assign_cast_p (def_stmt))
    7328            0 :                     rhs1 = gimple_assign_rhs1 (def_stmt);
    7329              :                 }
    7330            0 :               else if (CONVERT_EXPR_P (offset[i]))
    7331            0 :                 rhs1 = TREE_OPERAND (offset[i], 0);
    7332            0 :               if (rhs1
    7333            0 :                   && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
    7334            0 :                   && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
    7335            0 :                   && (TYPE_PRECISION (TREE_TYPE (offset[i]))
    7336            0 :                       >= TYPE_PRECISION (TREE_TYPE (rhs1))))
    7337            0 :                 offset[i] = rhs1;
    7338              :             }
    7339            0 :           if (!operand_equal_p (offset[0], offset[1], 0)
    7340            0 :               || !operand_equal_p (step[0], step[1], 0))
    7341            0 :             return false;
    7342              :         }
    7343              :     }
    7344              :   return true;
    7345              : }
    7346              : 
    7347              : 
    7348              : enum scan_store_kind {
    7349              :   /* Normal permutation.  */
    7350              :   scan_store_kind_perm,
    7351              : 
    7352              :   /* Whole vector left shift permutation with zero init.  */
    7353              :   scan_store_kind_lshift_zero,
    7354              : 
    7355              :   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
    7356              :   scan_store_kind_lshift_cond
    7357              : };
    7358              : 
    7359              : /* Function check_scan_store.
    7360              : 
    7361              :    Verify if we can perform the needed permutations or whole vector shifts.
    7362              :    Return -1 on failure, otherwise exact log2 of vectype's nunits.
    7363              :    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
    7364              :    to do at each step.  */
    7365              : 
    7366              : static int
    7367         1024 : scan_store_can_perm_p (tree vectype, tree init,
    7368              :                        vec<enum scan_store_kind> *use_whole_vector = NULL)
    7369              : {
    7370         1024 :   enum machine_mode vec_mode = TYPE_MODE (vectype);
    7371         1024 :   unsigned HOST_WIDE_INT nunits;
    7372         1024 :   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
    7373              :     return -1;
    7374         1024 :   int units_log2 = exact_log2 (nunits);
    7375         1024 :   if (units_log2 <= 0)
    7376              :     return -1;
    7377              : 
    7378              :   int i;
    7379              :   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
    7380         4784 :   for (i = 0; i <= units_log2; ++i)
    7381              :     {
    7382         3760 :       unsigned HOST_WIDE_INT j, k;
    7383         3760 :       enum scan_store_kind kind = scan_store_kind_perm;
    7384         3760 :       vec_perm_builder sel (nunits, nunits, 1);
    7385         3760 :       sel.quick_grow (nunits);
    7386         3760 :       if (i == units_log2)
    7387              :         {
    7388         9728 :           for (j = 0; j < nunits; ++j)
    7389         8704 :             sel[j] = nunits - 1;
    7390              :         }
    7391              :       else
    7392              :         {
    7393        10416 :           for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
    7394         7680 :             sel[j] = j;
    7395        26416 :           for (k = 0; j < nunits; ++j, ++k)
    7396        23680 :             sel[j] = nunits + k;
    7397              :         }
    7398         6496 :       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
    7399         3760 :       if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
    7400              :         {
    7401            0 :           if (i == units_log2)
    7402              :             return -1;
    7403              : 
    7404            0 :           if (whole_vector_shift_kind == scan_store_kind_perm)
    7405              :             {
    7406            0 :               if (!can_implement_p (vec_shl_optab, vec_mode))
    7407              :                 return -1;
    7408            0 :               whole_vector_shift_kind = scan_store_kind_lshift_zero;
    7409              :               /* Whole vector shifts shift in zeros, so if init is all zero
    7410              :                  constant, there is no need to do anything further.  */
    7411            0 :               if ((TREE_CODE (init) != INTEGER_CST
    7412            0 :                    && TREE_CODE (init) != REAL_CST)
    7413            0 :                   || !initializer_zerop (init))
    7414              :                 {
    7415            0 :                   tree masktype = truth_type_for (vectype);
    7416            0 :                   if (!expand_vec_cond_expr_p (vectype, masktype))
    7417              :                     return -1;
    7418              :                   whole_vector_shift_kind = scan_store_kind_lshift_cond;
    7419              :                 }
    7420              :             }
    7421            0 :           kind = whole_vector_shift_kind;
    7422              :         }
    7423         3760 :       if (use_whole_vector)
    7424              :         {
    7425         1880 :           if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
    7426            0 :             use_whole_vector->safe_grow_cleared (i, true);
    7427         5640 :           if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
    7428            0 :             use_whole_vector->safe_push (kind);
    7429              :         }
    7430         3760 :     }
    7431              : 
    7432              :   return units_log2;
    7433              : }
    7434              : 
    7435              : 
    7436              : /* Function check_scan_store.
    7437              : 
    7438              :    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
    7439              : 
    7440              : static bool
    7441         1076 : check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
    7442              :                   enum vect_def_type rhs_dt, slp_tree slp_node,
    7443              :                   slp_tree mask_node,
    7444              :                   vect_memory_access_type memory_access_type)
    7445              : {
    7446         1076 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    7447         1076 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    7448         1076 :   tree ref_type;
    7449              : 
    7450         1076 :   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
    7451         1076 :   if (SLP_TREE_LANES (slp_node) > 1
    7452         1076 :       || mask_node
    7453         1076 :       || memory_access_type != VMAT_CONTIGUOUS
    7454         1076 :       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
    7455         1076 :       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
    7456         1076 :       || loop_vinfo == NULL
    7457         1076 :       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
    7458         1076 :       || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    7459         1076 :       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
    7460         1076 :       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
    7461         1076 :       || !integer_zerop (DR_INIT (dr_info->dr))
    7462         1076 :       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
    7463         2152 :       || !alias_sets_conflict_p (get_alias_set (vectype),
    7464         1076 :                                  get_alias_set (TREE_TYPE (ref_type))))
    7465              :     {
    7466            0 :       if (dump_enabled_p ())
    7467            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    7468              :                          "unsupported OpenMP scan store.\n");
    7469            0 :       return false;
    7470              :     }
    7471              : 
    7472              :   /* We need to pattern match code built by OpenMP lowering and simplified
    7473              :      by following optimizations into something we can handle.
    7474              :      #pragma omp simd reduction(inscan,+:r)
    7475              :      for (...)
    7476              :        {
    7477              :          r += something ();
    7478              :          #pragma omp scan inclusive (r)
    7479              :          use (r);
    7480              :        }
    7481              :      shall have body with:
    7482              :        // Initialization for input phase, store the reduction initializer:
    7483              :        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
    7484              :        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
    7485              :        D.2042[_21] = 0;
    7486              :        // Actual input phase:
    7487              :        ...
    7488              :        r.0_5 = D.2042[_20];
    7489              :        _6 = _4 + r.0_5;
    7490              :        D.2042[_20] = _6;
    7491              :        // Initialization for scan phase:
    7492              :        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
    7493              :        _26 = D.2043[_25];
    7494              :        _27 = D.2042[_25];
    7495              :        _28 = _26 + _27;
    7496              :        D.2043[_25] = _28;
    7497              :        D.2042[_25] = _28;
    7498              :        // Actual scan phase:
    7499              :        ...
    7500              :        r.1_8 = D.2042[_20];
    7501              :        ...
    7502              :      The "omp simd array" variable D.2042 holds the privatized copy used
    7503              :      inside of the loop and D.2043 is another one that holds copies of
    7504              :      the current original list item.  The separate GOMP_SIMD_LANE ifn
    7505              :      kinds are there in order to allow optimizing the initializer store
    7506              :      and combiner sequence, e.g. if it is originally some C++ish user
    7507              :      defined reduction, but allow the vectorizer to pattern recognize it
    7508              :      and turn into the appropriate vectorized scan.
    7509              : 
    7510              :      For exclusive scan, this is slightly different:
    7511              :      #pragma omp simd reduction(inscan,+:r)
    7512              :      for (...)
    7513              :        {
    7514              :          use (r);
    7515              :          #pragma omp scan exclusive (r)
    7516              :          r += something ();
    7517              :        }
    7518              :      shall have body with:
    7519              :        // Initialization for input phase, store the reduction initializer:
    7520              :        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
    7521              :        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
    7522              :        D.2042[_21] = 0;
    7523              :        // Actual input phase:
    7524              :        ...
    7525              :        r.0_5 = D.2042[_20];
    7526              :        _6 = _4 + r.0_5;
    7527              :        D.2042[_20] = _6;
    7528              :        // Initialization for scan phase:
    7529              :        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
    7530              :        _26 = D.2043[_25];
    7531              :        D.2044[_25] = _26;
    7532              :        _27 = D.2042[_25];
    7533              :        _28 = _26 + _27;
    7534              :        D.2043[_25] = _28;
    7535              :        // Actual scan phase:
    7536              :        ...
    7537              :        r.1_8 = D.2044[_20];
    7538              :        ...  */
    7539              : 
    7540         1076 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
    7541              :     {
    7542              :       /* Match the D.2042[_21] = 0; store above.  Just require that
    7543              :          it is a constant or external definition store.  */
    7544          564 :       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
    7545              :         {
    7546            0 :          fail_init:
    7547            0 :           if (dump_enabled_p ())
    7548            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    7549              :                              "unsupported OpenMP scan initializer store.\n");
    7550            0 :           return false;
    7551              :         }
    7552              : 
    7553          564 :       if (! loop_vinfo->scan_map)
    7554          322 :         loop_vinfo->scan_map = new hash_map<tree, tree>;
    7555          564 :       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
    7556          564 :       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
    7557          564 :       if (cached)
    7558            0 :         goto fail_init;
    7559          564 :       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
    7560              : 
    7561              :       /* These stores can be vectorized normally.  */
    7562          564 :       return true;
    7563              :     }
    7564              : 
    7565          512 :   if (rhs_dt != vect_internal_def)
    7566              :     {
    7567            0 :      fail:
    7568            0 :       if (dump_enabled_p ())
    7569            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    7570              :                          "unsupported OpenMP scan combiner pattern.\n");
    7571            0 :       return false;
    7572              :     }
    7573              : 
    7574          512 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    7575          512 :   tree rhs = gimple_assign_rhs1 (stmt);
    7576          512 :   if (TREE_CODE (rhs) != SSA_NAME)
    7577            0 :     goto fail;
    7578              : 
    7579          512 :   gimple *other_store_stmt = NULL;
    7580          512 :   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
    7581          512 :   bool inscan_var_store
    7582          512 :     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
    7583              : 
    7584          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
    7585              :     {
    7586          252 :       if (!inscan_var_store)
    7587              :         {
    7588          126 :           use_operand_p use_p;
    7589          126 :           imm_use_iterator iter;
    7590          378 :           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
    7591              :             {
    7592          252 :               gimple *use_stmt = USE_STMT (use_p);
    7593          252 :               if (use_stmt == stmt || is_gimple_debug (use_stmt))
    7594          126 :                 continue;
    7595          126 :               if (gimple_bb (use_stmt) != gimple_bb (stmt)
    7596          126 :                   || !is_gimple_assign (use_stmt)
    7597          126 :                   || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
    7598          126 :                   || other_store_stmt
    7599          252 :                   || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
    7600            0 :                 goto fail;
    7601          126 :               other_store_stmt = use_stmt;
    7602            0 :             }
    7603          126 :           if (other_store_stmt == NULL)
    7604            0 :             goto fail;
    7605          126 :           rhs = gimple_assign_lhs (other_store_stmt);
    7606          126 :           if (!single_imm_use (rhs, &use_p, &other_store_stmt))
    7607            0 :             goto fail;
    7608              :         }
    7609              :     }
    7610          260 :   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
    7611              :     {
    7612          260 :       use_operand_p use_p;
    7613          260 :       imm_use_iterator iter;
    7614         1040 :       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
    7615              :         {
    7616          520 :           gimple *use_stmt = USE_STMT (use_p);
    7617          520 :           if (use_stmt == stmt || is_gimple_debug (use_stmt))
    7618          260 :             continue;
    7619          260 :           if (other_store_stmt)
    7620            0 :             goto fail;
    7621          260 :           other_store_stmt = use_stmt;
    7622          260 :         }
    7623              :     }
    7624              :   else
    7625            0 :     goto fail;
    7626              : 
    7627          512 :   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
    7628          512 :   if (gimple_bb (def_stmt) != gimple_bb (stmt)
    7629          512 :       || !is_gimple_assign (def_stmt)
    7630         1024 :       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
    7631            0 :     goto fail;
    7632              : 
    7633          512 :   enum tree_code code = gimple_assign_rhs_code (def_stmt);
    7634              :   /* For pointer addition, we should use the normal plus for the vector
    7635              :      operation.  */
    7636          512 :   switch (code)
    7637              :     {
    7638            0 :     case POINTER_PLUS_EXPR:
    7639            0 :       code = PLUS_EXPR;
    7640            0 :       break;
    7641            0 :     case MULT_HIGHPART_EXPR:
    7642            0 :       goto fail;
    7643              :     default:
    7644              :       break;
    7645              :     }
    7646          512 :   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
    7647            0 :     goto fail;
    7648              : 
    7649          512 :   tree rhs1 = gimple_assign_rhs1 (def_stmt);
    7650          512 :   tree rhs2 = gimple_assign_rhs2 (def_stmt);
    7651          512 :   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
    7652            0 :     goto fail;
    7653              : 
    7654          512 :   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
    7655          512 :   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
    7656          512 :   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
    7657          512 :       || !gimple_assign_load_p (load1_stmt)
    7658          512 :       || gimple_bb (load2_stmt) != gimple_bb (stmt)
    7659         1024 :       || !gimple_assign_load_p (load2_stmt))
    7660            0 :     goto fail;
    7661              : 
    7662          512 :   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
    7663          512 :   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
    7664          512 :   if (load1_stmt_info == NULL
    7665          512 :       || load2_stmt_info == NULL
    7666          512 :       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
    7667          512 :           != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
    7668          512 :       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
    7669          512 :           != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
    7670            0 :     goto fail;
    7671              : 
    7672          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
    7673              :     {
    7674          126 :       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
    7675          126 :       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
    7676          126 :           || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
    7677            0 :         goto fail;
    7678          126 :       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
    7679          126 :       tree lrhs;
    7680          126 :       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
    7681              :         lrhs = rhs1;
    7682              :       else
    7683           16 :         lrhs = rhs2;
    7684          126 :       use_operand_p use_p;
    7685          126 :       imm_use_iterator iter;
    7686          504 :       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
    7687              :         {
    7688          252 :           gimple *use_stmt = USE_STMT (use_p);
    7689          252 :           if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
    7690          126 :             continue;
    7691          126 :           if (other_store_stmt)
    7692            0 :             goto fail;
    7693          126 :           other_store_stmt = use_stmt;
    7694          126 :         }
    7695              :     }
    7696              : 
    7697          512 :   if (other_store_stmt == NULL)
    7698            0 :     goto fail;
    7699          512 :   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
    7700          512 :       || !gimple_store_p (other_store_stmt))
    7701            0 :     goto fail;
    7702              : 
    7703          512 :   stmt_vec_info other_store_stmt_info
    7704          512 :     = loop_vinfo->lookup_stmt (other_store_stmt);
    7705          512 :   if (other_store_stmt_info == NULL
    7706          512 :       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
    7707          512 :           != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
    7708            0 :     goto fail;
    7709              : 
    7710          512 :   gimple *stmt1 = stmt;
    7711          512 :   gimple *stmt2 = other_store_stmt;
    7712          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
    7713              :     std::swap (stmt1, stmt2);
    7714          512 :   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
    7715              :                             gimple_assign_rhs1 (load2_stmt)))
    7716              :     {
    7717          162 :       std::swap (rhs1, rhs2);
    7718          162 :       std::swap (load1_stmt, load2_stmt);
    7719          162 :       std::swap (load1_stmt_info, load2_stmt_info);
    7720              :     }
    7721          512 :   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
    7722              :                              gimple_assign_rhs1 (load1_stmt)))
    7723            0 :     goto fail;
    7724              : 
    7725          512 :   tree var3 = NULL_TREE;
    7726          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
    7727          512 :       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
    7728              :                                 gimple_assign_rhs1 (load2_stmt)))
    7729            0 :     goto fail;
    7730          512 :   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
    7731              :     {
    7732          252 :       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
    7733          252 :       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
    7734          252 :           || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
    7735            0 :         goto fail;
    7736          252 :       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
    7737          252 :       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
    7738          252 :           || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
    7739          504 :           || lookup_attribute ("omp simd inscan exclusive",
    7740          252 :                                DECL_ATTRIBUTES (var3)))
    7741            0 :         goto fail;
    7742              :     }
    7743              : 
    7744          512 :   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
    7745          512 :   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
    7746          512 :       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
    7747            0 :     goto fail;
    7748              : 
    7749          512 :   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
    7750          512 :   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
    7751          512 :   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
    7752          512 :       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
    7753         1024 :       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
    7754          512 :          == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
    7755            0 :     goto fail;
    7756              : 
    7757          512 :   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
    7758          256 :     std::swap (var1, var2);
    7759              : 
    7760          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
    7761              :     {
    7762          252 :       if (!lookup_attribute ("omp simd inscan exclusive",
    7763          252 :                              DECL_ATTRIBUTES (var1)))
    7764            0 :         goto fail;
    7765          252 :       var1 = var3;
    7766              :     }
    7767              : 
    7768          512 :   if (loop_vinfo->scan_map == NULL)
    7769            0 :     goto fail;
    7770          512 :   tree *init = loop_vinfo->scan_map->get (var1);
    7771          512 :   if (init == NULL)
    7772            0 :     goto fail;
    7773              : 
    7774              :   /* The IL is as expected, now check if we can actually vectorize it.
    7775              :      Inclusive scan:
    7776              :        _26 = D.2043[_25];
    7777              :        _27 = D.2042[_25];
    7778              :        _28 = _26 + _27;
    7779              :        D.2043[_25] = _28;
    7780              :        D.2042[_25] = _28;
    7781              :      should be vectorized as (where _40 is the vectorized rhs
    7782              :      from the D.2042[_21] = 0; store):
    7783              :        _30 = MEM <vector(8) int> [(int *)&D.2043];
    7784              :        _31 = MEM <vector(8) int> [(int *)&D.2042];
    7785              :        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
    7786              :        _33 = _31 + _32;
    7787              :        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
    7788              :        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
    7789              :        _35 = _33 + _34;
    7790              :        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
    7791              :        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
    7792              :        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
    7793              :        _37 = _35 + _36;
    7794              :        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
    7795              :        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
    7796              :        _38 = _30 + _37;
    7797              :        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
    7798              :        MEM <vector(8) int> [(int *)&D.2043] = _39;
    7799              :        MEM <vector(8) int> [(int *)&D.2042] = _38;
    7800              :      Exclusive scan:
    7801              :        _26 = D.2043[_25];
    7802              :        D.2044[_25] = _26;
    7803              :        _27 = D.2042[_25];
    7804              :        _28 = _26 + _27;
    7805              :        D.2043[_25] = _28;
    7806              :      should be vectorized as (where _40 is the vectorized rhs
    7807              :      from the D.2042[_21] = 0; store):
    7808              :        _30 = MEM <vector(8) int> [(int *)&D.2043];
    7809              :        _31 = MEM <vector(8) int> [(int *)&D.2042];
    7810              :        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
    7811              :        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
    7812              :        _34 = _32 + _33;
    7813              :        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
    7814              :        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
    7815              :        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
    7816              :        _36 = _34 + _35;
    7817              :        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
    7818              :        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
    7819              :        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
    7820              :        _38 = _36 + _37;
    7821              :        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
    7822              :        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
    7823              :        _39 = _30 + _38;
    7824              :        _50 = _31 + _39;
    7825              :        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
    7826              :        MEM <vector(8) int> [(int *)&D.2044] = _39;
    7827              :        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
    7828          512 :   enum machine_mode vec_mode = TYPE_MODE (vectype);
    7829          512 :   optab optab = optab_for_tree_code (code, vectype, optab_default);
    7830          512 :   if (!optab || !can_implement_p (optab, vec_mode))
    7831            0 :     goto fail;
    7832              : 
    7833          512 :   int units_log2 = scan_store_can_perm_p (vectype, *init);
    7834          512 :   if (units_log2 == -1)
    7835            0 :     goto fail;
    7836              : 
    7837              :   return true;
    7838              : }
    7839              : 
    7840              : 
    7841              : /* Function vectorizable_scan_store.
    7842              : 
    7843              :    Helper of vectorizable_score, arguments like on vectorizable_store.
    7844              :    Handle only the transformation, checking is done in check_scan_store.  */
    7845              : 
    7846              : static bool
    7847          512 : vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
    7848              :                          slp_tree slp_node, gimple_stmt_iterator *gsi)
    7849              : {
    7850          512 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    7851          512 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    7852          512 :   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
    7853          512 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
    7854              : 
    7855          512 :   if (dump_enabled_p ())
    7856          492 :     dump_printf_loc (MSG_NOTE, vect_location,
    7857              :                      "transform scan store.\n");
    7858              : 
    7859          512 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    7860          512 :   tree rhs = gimple_assign_rhs1 (stmt);
    7861          512 :   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
    7862              : 
    7863          512 :   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
    7864          512 :   bool inscan_var_store
    7865          512 :     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
    7866              : 
    7867          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
    7868              :     {
    7869          126 :       use_operand_p use_p;
    7870          126 :       imm_use_iterator iter;
    7871          252 :       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
    7872              :         {
    7873          126 :           gimple *use_stmt = USE_STMT (use_p);
    7874          126 :           if (use_stmt == stmt || is_gimple_debug (use_stmt))
    7875            0 :             continue;
    7876          126 :           rhs = gimple_assign_lhs (use_stmt);
    7877          126 :           break;
    7878          126 :         }
    7879              :     }
    7880              : 
    7881          512 :   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
    7882          512 :   enum tree_code code = gimple_assign_rhs_code (def_stmt);
    7883          512 :   if (code == POINTER_PLUS_EXPR)
    7884            0 :     code = PLUS_EXPR;
    7885          512 :   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
    7886              :               && commutative_tree_code (code));
    7887          512 :   tree rhs1 = gimple_assign_rhs1 (def_stmt);
    7888          512 :   tree rhs2 = gimple_assign_rhs2 (def_stmt);
    7889          512 :   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
    7890          512 :   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
    7891          512 :   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
    7892          512 :   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
    7893          512 :   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
    7894          512 :   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
    7895          512 :   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
    7896          512 :   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
    7897          512 :   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
    7898              : 
    7899          512 :   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
    7900              :     {
    7901          436 :       std::swap (rhs1, rhs2);
    7902          436 :       std::swap (var1, var2);
    7903          436 :       std::swap (load1_dr_info, load2_dr_info);
    7904              :     }
    7905              : 
    7906          512 :   tree *init = loop_vinfo->scan_map->get (var1);
    7907          512 :   gcc_assert (init);
    7908              : 
    7909          512 :   unsigned HOST_WIDE_INT nunits;
    7910          512 :   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
    7911              :     gcc_unreachable ();
    7912          512 :   auto_vec<enum scan_store_kind, 16> use_whole_vector;
    7913          512 :   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
    7914          512 :   gcc_assert (units_log2 > 0);
    7915          512 :   auto_vec<tree, 16> perms;
    7916          512 :   perms.quick_grow (units_log2 + 1);
    7917          512 :   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
    7918         2392 :   for (int i = 0; i <= units_log2; ++i)
    7919              :     {
    7920         1880 :       unsigned HOST_WIDE_INT j, k;
    7921         1880 :       vec_perm_builder sel (nunits, nunits, 1);
    7922         1880 :       sel.quick_grow (nunits);
    7923         1880 :       if (i == units_log2)
    7924         4864 :         for (j = 0; j < nunits; ++j)
    7925         4352 :           sel[j] = nunits - 1;
    7926              :       else
    7927              :         {
    7928         5208 :           for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
    7929         3840 :             sel[j] = j;
    7930        13208 :           for (k = 0; j < nunits; ++j, ++k)
    7931        11840 :             sel[j] = nunits + k;
    7932              :         }
    7933         3248 :       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
    7934         1880 :       if (!use_whole_vector.is_empty ()
    7935            0 :           && use_whole_vector[i] != scan_store_kind_perm)
    7936              :         {
    7937            0 :           if (zero_vec == NULL_TREE)
    7938            0 :             zero_vec = build_zero_cst (vectype);
    7939            0 :           if (masktype == NULL_TREE
    7940            0 :               && use_whole_vector[i] == scan_store_kind_lshift_cond)
    7941            0 :             masktype = truth_type_for (vectype);
    7942            0 :           perms[i] = vect_gen_perm_mask_any (vectype, indices);
    7943              :         }
    7944              :       else
    7945         1880 :         perms[i] = vect_gen_perm_mask_checked (vectype, indices);
    7946         1880 :     }
    7947              : 
    7948          512 :   vec_loop_lens *loop_lens
    7949          512 :     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
    7950              :        ? &LOOP_VINFO_LENS (loop_vinfo)
    7951            0 :        : NULL);
    7952              : 
    7953          512 :   tree vec_oprnd1 = NULL_TREE;
    7954          512 :   tree vec_oprnd2 = NULL_TREE;
    7955          512 :   tree vec_oprnd3 = NULL_TREE;
    7956          512 :   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
    7957          512 :   tree dataref_offset = build_int_cst (ref_type, 0);
    7958          512 :   tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
    7959              :                                            vectype, VMAT_CONTIGUOUS,
    7960              :                                            loop_lens);
    7961          512 :   tree ldataref_ptr = NULL_TREE;
    7962          512 :   tree orig = NULL_TREE;
    7963          512 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
    7964          126 :     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
    7965              :   /* The initialization is invariant.  */
    7966          512 :   vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
    7967          512 :   auto_vec<tree> vec_oprnds2;
    7968          512 :   auto_vec<tree> vec_oprnds3;
    7969          512 :   if (ldataref_ptr == NULL)
    7970              :     {
    7971              :       /* We want to lookup the vector operands of the reduction, not those
    7972              :          of the store - for SLP we have to use the proper SLP node for the
    7973              :          lookup, which should be the single child of the scan store.  */
    7974          386 :       vect_get_vec_defs (vinfo, SLP_TREE_CHILDREN (slp_node)[0],
    7975              :                          rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
    7976              :       /* ???  For SLP we do not key the def on 'rhs1' or 'rhs2' but get
    7977              :          them in SLP child order.  So we have to swap here with logic
    7978              :          similar to above.  */
    7979          386 :       stmt_vec_info load
    7980          386 :         = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
    7981          386 :                                    (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
    7982          386 :       dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
    7983          386 :       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
    7984          386 :       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
    7985          820 :         for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
    7986          494 :           std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
    7987              :     }
    7988              :   else
    7989          126 :     vect_get_vec_defs (vinfo, slp_node,
    7990              :                        rhs2, &vec_oprnds3);
    7991         1248 :   for (unsigned j = 0; j < vec_oprnds3.length (); j++)
    7992              :     {
    7993          736 :       if (ldataref_ptr == NULL)
    7994          554 :         vec_oprnd2 = vec_oprnds2[j];
    7995          736 :       vec_oprnd3 = vec_oprnds3[j];
    7996          736 :       if (j == 0)
    7997              :         orig = vec_oprnd3;
    7998          224 :       else if (!inscan_var_store)
    7999          112 :         dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
    8000              : 
    8001          736 :       if (ldataref_ptr)
    8002              :         {
    8003          182 :           vec_oprnd2 = make_ssa_name (vectype);
    8004          182 :           tree data_ref = fold_build2 (MEM_REF, vectype,
    8005              :                                        unshare_expr (ldataref_ptr),
    8006              :                                        dataref_offset);
    8007          182 :           vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
    8008          182 :           gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
    8009          182 :           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8010              :         }
    8011              : 
    8012          736 :       tree v = vec_oprnd2;
    8013         3068 :       for (int i = 0; i < units_log2; ++i)
    8014              :         {
    8015         2332 :           tree new_temp = make_ssa_name (vectype);
    8016         2332 :           gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
    8017              :                                            (zero_vec
    8018            0 :                                             && (use_whole_vector[i]
    8019            0 :                                                 != scan_store_kind_perm))
    8020              :                                            ? zero_vec : vec_oprnd1, v,
    8021         2332 :                                            perms[i]);
    8022         2332 :           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8023              : 
    8024         2332 :           if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
    8025              :             {
    8026              :               /* Whole vector shift shifted in zero bits, but if *init
    8027              :                  is not initializer_zerop, we need to replace those elements
    8028              :                  with elements from vec_oprnd1.  */
    8029            0 :               tree_vector_builder vb (masktype, nunits, 1);
    8030            0 :               for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
    8031            0 :                 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
    8032              :                                ? boolean_false_node : boolean_true_node);
    8033              : 
    8034            0 :               tree new_temp2 = make_ssa_name (vectype);
    8035            0 :               g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
    8036              :                                        new_temp, vec_oprnd1);
    8037            0 :               vect_finish_stmt_generation (vinfo, stmt_info,
    8038              :                                                            g, gsi);
    8039            0 :               new_temp = new_temp2;
    8040            0 :             }
    8041              : 
    8042              :           /* For exclusive scan, perform the perms[i] permutation once
    8043              :              more.  */
    8044         2332 :           if (i == 0
    8045         1100 :               && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
    8046          728 :               && v == vec_oprnd2)
    8047              :             {
    8048          364 :               v = new_temp;
    8049          364 :               --i;
    8050          364 :               continue;
    8051              :             }
    8052              : 
    8053         1968 :           tree new_temp2 = make_ssa_name (vectype);
    8054         1968 :           g = gimple_build_assign (new_temp2, code, v, new_temp);
    8055         1968 :           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8056              : 
    8057         1968 :           v = new_temp2;
    8058              :         }
    8059              : 
    8060          736 :       tree new_temp = make_ssa_name (vectype);
    8061          736 :       gimple *g = gimple_build_assign (new_temp, code, orig, v);
    8062          736 :       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8063              : 
    8064          736 :       tree last_perm_arg = new_temp;
    8065              :       /* For exclusive scan, new_temp computed above is the exclusive scan
    8066              :          prefix sum.  Turn it into inclusive prefix sum for the broadcast
    8067              :          of the last element into orig.  */
    8068          736 :       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
    8069              :         {
    8070          364 :           last_perm_arg = make_ssa_name (vectype);
    8071          364 :           g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
    8072          364 :           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8073              :         }
    8074              : 
    8075          736 :       orig = make_ssa_name (vectype);
    8076         2208 :       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
    8077          736 :                                last_perm_arg, perms[units_log2]);
    8078          736 :       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8079              : 
    8080          736 :       if (!inscan_var_store)
    8081              :         {
    8082          368 :           tree data_ref = fold_build2 (MEM_REF, vectype,
    8083              :                                        unshare_expr (dataref_ptr),
    8084              :                                        dataref_offset);
    8085          368 :           vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
    8086          368 :           g = gimple_build_assign (data_ref, new_temp);
    8087          368 :           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8088              :         }
    8089              :     }
    8090              : 
    8091          512 :   if (inscan_var_store)
    8092          624 :     for (unsigned j = 0; j < vec_oprnds3.length (); j++)
    8093              :       {
    8094          368 :         if (j != 0)
    8095          112 :           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
    8096              : 
    8097          368 :         tree data_ref = fold_build2 (MEM_REF, vectype,
    8098              :                                      unshare_expr (dataref_ptr),
    8099              :                                      dataref_offset);
    8100          368 :         vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
    8101          368 :         gimple *g = gimple_build_assign (data_ref, orig);
    8102          368 :         vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
    8103              :       }
    8104          512 :   return true;
    8105          512 : }
    8106              : 
    8107              : 
    8108              : /* Function vectorizable_store.
    8109              : 
    8110              :    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
    8111              :    that can be vectorized.
    8112              :    If COST_VEC is passed, calculate costs but don't change anything,
    8113              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    8114              :    it, and insert it at GSI.
    8115              :    Return true if STMT_INFO is vectorizable in this way.  */
    8116              : 
    8117              : static bool
    8118      2077735 : vectorizable_store (vec_info *vinfo,
    8119              :                     stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    8120              :                     slp_tree slp_node,
    8121              :                     stmt_vector_for_cost *cost_vec)
    8122              : {
    8123      2077735 :   tree data_ref;
    8124      2077735 :   tree vec_oprnd = NULL_TREE;
    8125      2077735 :   tree elem_type;
    8126      2077735 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    8127      2077735 :   class loop *loop = NULL;
    8128      2077735 :   machine_mode vec_mode;
    8129      2077735 :   tree dummy;
    8130      2077735 :   enum vect_def_type rhs_dt = vect_unknown_def_type;
    8131      2077735 :   enum vect_def_type mask_dt = vect_unknown_def_type;
    8132      2077735 :   tree dataref_ptr = NULL_TREE;
    8133      2077735 :   tree dataref_offset = NULL_TREE;
    8134      2077735 :   gimple *ptr_incr = NULL;
    8135      2077735 :   int j;
    8136      2077735 :   stmt_vec_info first_stmt_info;
    8137      2077735 :   bool grouped_store;
    8138      2077735 :   unsigned int group_size, i;
    8139      2077735 :   unsigned int vec_num;
    8140      2077735 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    8141      2077735 :   tree aggr_type;
    8142      2077735 :   poly_uint64 vf;
    8143      2077735 :   vec_load_store_type vls_type;
    8144      2077735 :   tree ref_type;
    8145              : 
    8146      2077735 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    8147              :     return false;
    8148              : 
    8149      2077735 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    8150       234683 :       && cost_vec)
    8151              :     return false;
    8152              : 
    8153              :   /* Is vectorizable store? */
    8154              : 
    8155      1843052 :   tree mask_vectype = NULL_TREE;
    8156      1843052 :   slp_tree mask_node = NULL;
    8157      1843052 :   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
    8158              :     {
    8159      1771827 :       tree scalar_dest = gimple_assign_lhs (assign);
    8160      1771827 :       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
    8161      1771827 :           && is_pattern_stmt_p (stmt_info))
    8162         1672 :         scalar_dest = TREE_OPERAND (scalar_dest, 0);
    8163      1771827 :       if (TREE_CODE (scalar_dest) != ARRAY_REF
    8164      1771827 :           && TREE_CODE (scalar_dest) != BIT_FIELD_REF
    8165              :           && TREE_CODE (scalar_dest) != INDIRECT_REF
    8166              :           && TREE_CODE (scalar_dest) != COMPONENT_REF
    8167              :           && TREE_CODE (scalar_dest) != IMAGPART_EXPR
    8168              :           && TREE_CODE (scalar_dest) != REALPART_EXPR
    8169              :           && TREE_CODE (scalar_dest) != MEM_REF)
    8170              :         return false;
    8171              :     }
    8172              :   else
    8173              :     {
    8174       729107 :       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
    8175        12445 :       if (!call || !gimple_call_internal_p (call))
    8176              :         return false;
    8177              : 
    8178         8326 :       internal_fn ifn = gimple_call_internal_fn (call);
    8179         8326 :       if (!internal_store_fn_p (ifn))
    8180              :         return false;
    8181              : 
    8182         1899 :       int mask_index = internal_fn_mask_index (ifn);
    8183         1899 :       if (mask_index >= 0)
    8184         1899 :         mask_index = vect_slp_child_index_for_operand (stmt_info, mask_index);
    8185         1899 :       if (mask_index >= 0
    8186         1899 :           && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
    8187              :                                       &mask_node, &mask_dt,
    8188              :                                       &mask_vectype))
    8189              :         return false;
    8190              :     }
    8191              : 
    8192      1361105 :   tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE;
    8193      1361105 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    8194              : 
    8195      1361105 :   if (loop_vinfo)
    8196              :     {
    8197       226694 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    8198       226694 :       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    8199              :     }
    8200              :   else
    8201              :     vf = 1;
    8202      1361105 :   vec_num = vect_get_num_copies (vinfo, slp_node);
    8203              : 
    8204              :   /* FORNOW.  This restriction should be relaxed.  */
    8205      1361105 :   if (loop
    8206      1361380 :       && nested_in_vect_loop_p (loop, stmt_info)
    8207      1361388 :       && vec_num > 1)
    8208              :     {
    8209            8 :       if (dump_enabled_p ())
    8210            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8211              :                          "multiple types in nested loop.\n");
    8212            8 :       return false;
    8213              :     }
    8214              : 
    8215      1361097 :   slp_tree op_node;
    8216      1361097 :   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
    8217              :                              &op_node, &rhs_dt, &rhs_vectype, &vls_type))
    8218              :     return false;
    8219              : 
    8220      1361073 :   elem_type = TREE_TYPE (vectype);
    8221      1361073 :   vec_mode = TYPE_MODE (vectype);
    8222              : 
    8223      1361073 :   if (!STMT_VINFO_DATA_REF (stmt_info))
    8224              :     return false;
    8225              : 
    8226      1361073 :   vect_load_store_data _ls_data{};
    8227      1361073 :   vect_load_store_data &ls = slp_node->get_data (_ls_data);
    8228      1361073 :   if (cost_vec
    8229      1361073 :       && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
    8230              :                                vls_type, &_ls_data))
    8231              :     return false;
    8232              :   /* Temporary aliases to analysis data, should not be modified through
    8233              :      these.  */
    8234      1360457 :   const vect_memory_access_type memory_access_type = ls.memory_access_type;
    8235      1360457 :   const dr_alignment_support alignment_support_scheme
    8236              :     = ls.alignment_support_scheme;
    8237      1360457 :   const int misalignment = ls.misalignment;
    8238      1360457 :   const poly_int64 poffset = ls.poffset;
    8239              : 
    8240      1360457 :   if (slp_node->ldst_lanes
    8241            0 :       && memory_access_type != VMAT_LOAD_STORE_LANES)
    8242              :     {
    8243            0 :       if (dump_enabled_p ())
    8244            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8245              :                          "discovered store-lane but cannot use it.\n");
    8246            0 :       return false;
    8247              :     }
    8248              : 
    8249      1360457 :   if (mask_node)
    8250              :     {
    8251         1809 :       if (memory_access_type == VMAT_CONTIGUOUS)
    8252              :         {
    8253          616 :           if (!VECTOR_MODE_P (vec_mode)
    8254         3086 :               || !can_vec_mask_load_store_p (vec_mode,
    8255         1543 :                                              TYPE_MODE (mask_vectype), false))
    8256          114 :             return false;
    8257              :         }
    8258          266 :       else if (memory_access_type != VMAT_LOAD_STORE_LANES
    8259          266 :                && (!mat_gather_scatter_p (memory_access_type)
    8260          242 :                    || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY
    8261          170 :                        && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
    8262              :         {
    8263           24 :           if (dump_enabled_p ())
    8264           24 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8265              :                              "unsupported access type for masked store.\n");
    8266           24 :           return false;
    8267              :         }
    8268          242 :       else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
    8269              :         {
    8270           72 :           if (dump_enabled_p ())
    8271           24 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8272              :                              "unsupported masked emulated scatter.\n");
    8273           72 :           return false;
    8274              :         }
    8275              :     }
    8276              :   else
    8277              :     {
    8278              :       /* FORNOW. In some cases can vectorize even if data-type not supported
    8279              :          (e.g. - array initialization with 0).  */
    8280      1358648 :       if (!can_implement_p (mov_optab, vec_mode))
    8281              :         return false;
    8282              :     }
    8283              : 
    8284      1360247 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
    8285      1360247 :   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
    8286      2514575 :                    && !mat_gather_scatter_p (memory_access_type));
    8287      1154328 :   if (grouped_store)
    8288              :     {
    8289      1154328 :       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    8290      1154328 :       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
    8291      1154328 :       group_size = DR_GROUP_SIZE (first_stmt_info);
    8292              :     }
    8293              :   else
    8294              :     {
    8295      1360247 :       first_stmt_info = stmt_info;
    8296      1360247 :       first_dr_info = dr_info;
    8297              :       group_size = 1;
    8298              :     }
    8299              : 
    8300      1360247 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
    8301              :     {
    8302         1076 :       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node,
    8303              :                              mask_node, memory_access_type))
    8304              :         return false;
    8305              :     }
    8306              : 
    8307      2719726 :   bool costing_p = cost_vec;
    8308      1359479 :   if (costing_p) /* transformation not required.  */
    8309              :     {
    8310       814525 :       if (loop_vinfo
    8311       162742 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
    8312        76534 :         check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
    8313              :                                               vls_type, group_size, &ls,
    8314              :                                               mask_node);
    8315              : 
    8316       814525 :       if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
    8317       814525 :           || (mask_node
    8318         1055 :               && !vect_maybe_update_slp_op_vectype (mask_node,
    8319              :                                                     mask_vectype)))
    8320              :         {
    8321            0 :           if (dump_enabled_p ())
    8322            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8323              :                              "incompatible vector types for invariants\n");
    8324            0 :           return false;
    8325              :         }
    8326              : 
    8327       814525 :       if (dump_enabled_p ()
    8328              :           && memory_access_type != VMAT_ELEMENTWISE
    8329        15095 :           && memory_access_type != VMAT_STRIDED_SLP
    8330        14419 :           && memory_access_type != VMAT_INVARIANT
    8331       828944 :           && alignment_support_scheme != dr_aligned)
    8332         4997 :         dump_printf_loc (MSG_NOTE, vect_location,
    8333              :                          "Vectorizing an unaligned access.\n");
    8334              :     }
    8335              : 
    8336              :   /* Transform.  */
    8337              : 
    8338      1360247 :   ensure_base_align (dr_info);
    8339              : 
    8340      1360247 :   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
    8341              :     {
    8342         1024 :       gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
    8343         1024 :       gcc_assert (SLP_TREE_LANES (slp_node) == 1);
    8344         1024 :       if (costing_p)
    8345              :         {
    8346          512 :           unsigned int inside_cost = 0, prologue_cost = 0;
    8347          512 :           if (vls_type == VLS_STORE_INVARIANT)
    8348            0 :             prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
    8349              :                                                slp_node, 0, vect_prologue);
    8350          512 :           vect_get_store_cost (vinfo, stmt_info, slp_node, 1,
    8351              :                                alignment_support_scheme, misalignment,
    8352              :                                &inside_cost, cost_vec);
    8353              : 
    8354          512 :           if (dump_enabled_p ())
    8355          492 :             dump_printf_loc (MSG_NOTE, vect_location,
    8356              :                              "vect_model_store_cost: inside_cost = %d, "
    8357              :                              "prologue_cost = %d .\n",
    8358              :                              inside_cost, prologue_cost);
    8359              : 
    8360          512 :           SLP_TREE_TYPE (slp_node) = store_vec_info_type;
    8361          512 :           slp_node->data = new vect_load_store_data (std::move (ls));
    8362              : 
    8363          512 :           return true;
    8364              :         }
    8365          512 :       return vectorizable_scan_store (vinfo, stmt_info, slp_node, gsi);
    8366              :     }
    8367              : 
    8368              :   /* FORNOW */
    8369      1359223 :   gcc_assert (!grouped_store
    8370              :               || !loop
    8371              :               || !nested_in_vect_loop_p (loop, stmt_info));
    8372              : 
    8373      1359223 :   grouped_store = false;
    8374      1359223 :   first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
    8375      1359223 :   gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
    8376              :               || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
    8377      1359223 :   first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
    8378              : 
    8379      1359223 :   ref_type = get_group_alias_ptr_type (first_stmt_info);
    8380              : 
    8381      1359223 :   if (!costing_p && dump_enabled_p ())
    8382        12261 :     dump_printf_loc (MSG_NOTE, vect_location, "transform store.\n");
    8383              : 
    8384      1359223 :   if (memory_access_type == VMAT_ELEMENTWISE
    8385      1359223 :       || memory_access_type == VMAT_STRIDED_SLP)
    8386              :     {
    8387        29195 :       unsigned inside_cost = 0, prologue_cost = 0;
    8388        29195 :       gimple_stmt_iterator incr_gsi;
    8389        29195 :       bool insert_after;
    8390        29195 :       tree offvar = NULL_TREE;
    8391        29195 :       tree ivstep;
    8392        29195 :       tree running_off;
    8393        29195 :       tree stride_base, stride_step, alias_off;
    8394        29195 :       tree vec_oprnd = NULL_TREE;
    8395        29195 :       tree dr_offset;
    8396              :       /* Checked by get_load_store_type.  */
    8397        29195 :       unsigned int const_nunits = nunits.to_constant ();
    8398              : 
    8399        29195 :       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
    8400        29195 :       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
    8401              : 
    8402        29195 :       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
    8403        29195 :       stride_base
    8404        29195 :         = fold_build_pointer_plus
    8405              :             (DR_BASE_ADDRESS (first_dr_info->dr),
    8406              :              size_binop (PLUS_EXPR,
    8407              :                          convert_to_ptrofftype (dr_offset),
    8408              :                          convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
    8409        29195 :       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
    8410              : 
    8411              :       /* For a store with loop-invariant (but other than power-of-2)
    8412              :          stride (i.e. not a grouped access) like so:
    8413              : 
    8414              :            for (i = 0; i < n; i += stride)
    8415              :              array[i] = ...;
    8416              : 
    8417              :          we generate a new induction variable and new stores from
    8418              :          the components of the (vectorized) rhs:
    8419              : 
    8420              :            for (j = 0; ; j += VF*stride)
    8421              :              vectemp = ...;
    8422              :              tmp1 = vectemp[0];
    8423              :              array[j] = tmp1;
    8424              :              tmp2 = vectemp[1];
    8425              :              array[j + stride] = tmp2;
    8426              :              ...
    8427              :          */
    8428              : 
    8429              :       /* ???  Modify local copies of alignment_support_scheme and
    8430              :          misalignment, but this part of analysis should be done
    8431              :          earlier and remembered, likewise the chosen load mode.  */
    8432        29195 :       const dr_alignment_support tem = alignment_support_scheme;
    8433        29195 :       dr_alignment_support alignment_support_scheme = tem;
    8434        29195 :       const int tem2 = misalignment;
    8435        29195 :       int misalignment = tem2;
    8436              : 
    8437        29195 :       unsigned nstores = const_nunits;
    8438        29195 :       unsigned lnel = 1;
    8439        29195 :       tree ltype = elem_type;
    8440        29195 :       tree lvectype = vectype;
    8441        29195 :       HOST_WIDE_INT n = gcd (group_size, const_nunits);
    8442        29195 :       if (n == const_nunits)
    8443              :         {
    8444         2939 :           int mis_align = dr_misalignment (first_dr_info, vectype);
    8445              :           /* With VF > 1 we advance the DR by step, if that is constant
    8446              :              and only aligned when performed VF times, DR alignment
    8447              :              analysis can analyze this as aligned since it assumes
    8448              :              contiguous accesses.  But that is not how we code generate
    8449              :              here, so adjust for this.  */
    8450         2939 :           if (maybe_gt (vf, 1u)
    8451         4464 :               && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
    8452         4235 :                               DR_TARGET_ALIGNMENT (first_dr_info)))
    8453          229 :             mis_align = -1;
    8454         2939 :           dr_alignment_support dr_align
    8455         2939 :               = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
    8456              :                                                mis_align);
    8457         2939 :           if (dr_align == dr_aligned
    8458         2939 :               || dr_align == dr_unaligned_supported)
    8459              :             {
    8460        29195 :               nstores = 1;
    8461        29195 :               lnel = const_nunits;
    8462        29195 :               ltype = vectype;
    8463        29195 :               lvectype = vectype;
    8464        29195 :               alignment_support_scheme = dr_align;
    8465        29195 :               misalignment = mis_align;
    8466              :             }
    8467              :         }
    8468        26256 :       else if (n > 1)
    8469              :         {
    8470         1967 :           nstores = const_nunits / n;
    8471         1967 :           lnel = n;
    8472         1967 :           ltype = build_vector_type (elem_type, n);
    8473         1967 :           lvectype = vectype;
    8474         1967 :           int mis_align = dr_misalignment (first_dr_info, ltype);
    8475         1967 :           if (maybe_gt (vf, 1u)
    8476         3934 :               && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
    8477         3292 :                               DR_TARGET_ALIGNMENT (first_dr_info)))
    8478          642 :             mis_align = -1;
    8479         1967 :           dr_alignment_support dr_align
    8480         1967 :             = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
    8481              :                                              mis_align);
    8482         1967 :           alignment_support_scheme = dr_align;
    8483         1967 :           misalignment = mis_align;
    8484              : 
    8485              :           /* First check if vec_extract optab doesn't support extraction
    8486              :              of vector elts directly.  */
    8487         1967 :           scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
    8488         1967 :           machine_mode vmode;
    8489         3934 :           if (!VECTOR_MODE_P (TYPE_MODE (vectype))
    8490         2139 :               || !related_vector_mode (TYPE_MODE (vectype), elmode,
    8491         1967 :                                        n).exists (&vmode)
    8492         1773 :               || (convert_optab_handler (vec_extract_optab,
    8493         1773 :                                          TYPE_MODE (vectype), vmode)
    8494              :                   == CODE_FOR_nothing)
    8495         1967 :               || !(dr_align == dr_aligned
    8496          172 :                    || dr_align == dr_unaligned_supported))
    8497              :             {
    8498              :               /* Try to avoid emitting an extract of vector elements
    8499              :                  by performing the extracts using an integer type of the
    8500              :                  same size, extracting from a vector of those and then
    8501              :                  re-interpreting it as the original vector type if
    8502              :                  supported.  */
    8503         1795 :               unsigned lsize = n * GET_MODE_BITSIZE (elmode);
    8504         1795 :               unsigned int lnunits = const_nunits / n;
    8505              :               /* If we can't construct such a vector fall back to
    8506              :                  element extracts from the original vector type and
    8507              :                  element size stores.  */
    8508         1795 :               if (int_mode_for_size (lsize, 0).exists (&elmode)
    8509         1795 :                   && VECTOR_MODE_P (TYPE_MODE (vectype))
    8510         1795 :                   && related_vector_mode (TYPE_MODE (vectype), elmode,
    8511         1795 :                                           lnunits).exists (&vmode)
    8512         1767 :                   && (convert_optab_handler (vec_extract_optab,
    8513              :                                              vmode, elmode)
    8514              :                       != CODE_FOR_nothing))
    8515              :                 {
    8516         1767 :                   nstores = lnunits;
    8517         1767 :                   lnel = n;
    8518         1767 :                   ltype = build_nonstandard_integer_type (lsize, 1);
    8519         1767 :                   lvectype = build_vector_type (ltype, nstores);
    8520              :                 }
    8521              :               /* Else fall back to vector extraction anyway.
    8522              :                  Fewer stores are more important than avoiding spilling
    8523              :                  of the vector we extract from.  Compared to the
    8524              :                  construction case in vectorizable_load no store-forwarding
    8525              :                  issue exists here for reasonable archs.  But only
    8526              :                  if the store is supported.  */
    8527           28 :                  else if (!(dr_align == dr_aligned
    8528           28 :                             || dr_align == dr_unaligned_supported))
    8529              :                    {
    8530        29195 :                      nstores = const_nunits;
    8531        29195 :                      lnel = 1;
    8532        29195 :                      ltype = elem_type;
    8533        29195 :                      lvectype = vectype;
    8534              :                    }
    8535              :             }
    8536              :         }
    8537              : 
    8538        29195 :       if (costing_p)
    8539              :         {
    8540              :           /* Record the decomposition type for target access during costing.  */
    8541        25791 :           ls.ls_type = lvectype;
    8542        25791 :           ls.ls_eltype = ltype;
    8543              :         }
    8544              :       else
    8545         3404 :         gcc_assert (ls.ls_type == lvectype && ls.ls_eltype == ltype);
    8546              : 
    8547        29195 :       unsigned align;
    8548        29195 :       if (alignment_support_scheme == dr_aligned)
    8549         1241 :         align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
    8550              :       else
    8551        27954 :         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
    8552              :       /* Alignment is at most the access size if we do multiple stores.  */
    8553        29195 :       if (nstores > 1)
    8554        26256 :         align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
    8555        29195 :       ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
    8556        29195 :       int ncopies = vec_num;
    8557              : 
    8558        29195 :       if (!costing_p)
    8559              :         {
    8560         3404 :           ivstep = stride_step;
    8561         3404 :           ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
    8562              :                                 build_int_cst (TREE_TYPE (ivstep), vf));
    8563              : 
    8564         3404 :           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
    8565              : 
    8566         3404 :           stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
    8567         3404 :           ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
    8568         3404 :           create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
    8569              :                      insert_after, &offvar, NULL);
    8570              : 
    8571         3404 :           stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
    8572              :         }
    8573              : 
    8574        29195 :       alias_off = build_int_cst (ref_type, 0);
    8575        29195 :       auto_vec<tree> vec_oprnds;
    8576              :       /* For costing some adjacent vector stores, we'd like to cost with
    8577              :          the total number of them once instead of cost each one by one. */
    8578        29195 :       unsigned int n_adjacent_stores = 0;
    8579        29195 :       running_off = offvar;
    8580        29195 :       if (!costing_p)
    8581         3404 :         vect_get_slp_defs (op_node, &vec_oprnds);
    8582        29195 :       unsigned int group_el = 0;
    8583        29195 :       unsigned HOST_WIDE_INT elsz
    8584        29195 :         = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    8585        69636 :       for (j = 0; j < ncopies; j++)
    8586              :         {
    8587        40441 :           if (!costing_p)
    8588              :             {
    8589         5265 :               vec_oprnd = vec_oprnds[j];
    8590              :               /* Pun the vector to extract from if necessary.  */
    8591         5265 :               if (lvectype != vectype)
    8592              :                 {
    8593         1008 :                   tree tem = make_ssa_name (lvectype);
    8594         1008 :                   tree cvt = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
    8595         1008 :                   gimple *pun = gimple_build_assign (tem, cvt);
    8596         1008 :                   vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
    8597         1008 :                   vec_oprnd = tem;
    8598              :                 }
    8599              :             }
    8600       179845 :           for (i = 0; i < nstores; i++)
    8601              :             {
    8602       139404 :               if (costing_p)
    8603              :                 {
    8604       123355 :                   n_adjacent_stores++;
    8605       123355 :                   continue;
    8606              :                 }
    8607        16049 :               tree newref, newoff;
    8608        16049 :               gimple *incr, *assign;
    8609        16049 :               tree size = TYPE_SIZE (ltype);
    8610              :               /* Extract the i'th component.  */
    8611        16049 :               tree pos = fold_build2 (MULT_EXPR, bitsizetype,
    8612              :                                       bitsize_int (i), size);
    8613        16049 :               tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
    8614              :                                        size, pos);
    8615              : 
    8616        16049 :               elem = force_gimple_operand_gsi (gsi, elem, true, NULL_TREE, true,
    8617              :                                                GSI_SAME_STMT);
    8618              : 
    8619        16049 :               tree this_off = build_int_cst (TREE_TYPE (alias_off),
    8620        16049 :                                              group_el * elsz);
    8621        16049 :               newref = build2 (MEM_REF, ltype, running_off, this_off);
    8622        16049 :               vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
    8623              : 
    8624              :               /* And store it to *running_off.  */
    8625        16049 :               assign = gimple_build_assign (newref, elem);
    8626        16049 :               vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
    8627              : 
    8628        16049 :               group_el += lnel;
    8629        16049 :               if (group_el == group_size)
    8630              :                 {
    8631        14412 :                   newoff = copy_ssa_name (running_off, NULL);
    8632        14412 :                   incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
    8633              :                                               running_off, stride_step);
    8634        14412 :                   vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
    8635              : 
    8636        14412 :                   running_off = newoff;
    8637        14412 :                   group_el = 0;
    8638              :                 }
    8639              :             }
    8640              :         }
    8641              : 
    8642        29195 :       if (costing_p)
    8643              :         {
    8644        25791 :           if (n_adjacent_stores > 0)
    8645              :             {
    8646              :               /* Take a single lane vector type store as scalar
    8647              :                  store to avoid ICE like 110776.  */
    8648        25791 :               if (VECTOR_TYPE_P (ltype)
    8649        25791 :                   && maybe_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
    8650         1612 :                 vect_get_store_cost (vinfo, stmt_info, slp_node,
    8651              :                                      n_adjacent_stores, alignment_support_scheme,
    8652              :                                      misalignment, &inside_cost, cost_vec);
    8653              :               else
    8654        24179 :                 inside_cost
    8655        24179 :                   += record_stmt_cost (cost_vec, n_adjacent_stores,
    8656              :                                        scalar_store, slp_node, 0, vect_body);
    8657              :               /* Only need vector deconstruction when there is more
    8658              :                  than one store.  */
    8659        25791 :               if (nstores > 1)
    8660        23763 :                 inside_cost
    8661        23763 :                   += record_stmt_cost (cost_vec, ncopies,
    8662              :                                        vec_deconstruct, slp_node, 0, vect_body);
    8663              :             }
    8664        25791 :           if (dump_enabled_p ())
    8665          676 :             dump_printf_loc (MSG_NOTE, vect_location,
    8666              :                              "vect_model_store_cost: inside_cost = %d, "
    8667              :                              "prologue_cost = %d .\n",
    8668              :                              inside_cost, prologue_cost);
    8669              : 
    8670        25791 :           SLP_TREE_TYPE (slp_node) = store_vec_info_type;
    8671        25791 :           slp_node->data = new vect_load_store_data (std::move (ls));
    8672              :         }
    8673              : 
    8674        29195 :       return true;
    8675        29195 :     }
    8676              : 
    8677      1330028 :   gcc_assert (alignment_support_scheme);
    8678      1330028 :   vec_loop_masks *loop_masks
    8679       195617 :     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
    8680      1330028 :        ? &LOOP_VINFO_MASKS (loop_vinfo)
    8681           11 :        : NULL);
    8682           11 :   vec_loop_lens *loop_lens
    8683       195617 :     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
    8684              :        ? &LOOP_VINFO_LENS (loop_vinfo)
    8685            0 :        : NULL);
    8686              : 
    8687              :   /* The vect_transform_stmt and vect_analyze_stmt will go here but there
    8688              :      are some difference here.  We cannot enable both the lens and masks
    8689              :      during transform but it is allowed during analysis.
    8690              :      Shouldn't go with length-based approach if fully masked.  */
    8691      1330028 :   if (cost_vec == NULL)
    8692              :     /* The cost_vec is NULL during transform.  */
    8693       541806 :     gcc_assert ((!loop_lens || !loop_masks));
    8694              : 
    8695              :   /* Targets with store-lane instructions must not require explicit
    8696              :      realignment.  vect_supportable_dr_alignment always returns either
    8697              :      dr_aligned or dr_unaligned_supported for masked operations.  */
    8698      1330028 :   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
    8699              :                && !mask_node
    8700              :                && !loop_masks)
    8701              :               || alignment_support_scheme == dr_aligned
    8702              :               || alignment_support_scheme == dr_unaligned_supported);
    8703              : 
    8704      1330028 :   tree offset = NULL_TREE;
    8705      1330028 :   if (!known_eq (poffset, 0))
    8706         4651 :     offset = size_int (poffset);
    8707              : 
    8708      1330028 :   tree bump;
    8709      1330028 :   tree vec_offset = NULL_TREE;
    8710      1330028 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    8711              :     {
    8712         1460 :       aggr_type = NULL_TREE;
    8713         1460 :       bump = NULL_TREE;
    8714              :     }
    8715      1328568 :   else if (mat_gather_scatter_p (memory_access_type))
    8716              :     {
    8717            0 :       aggr_type = elem_type;
    8718            0 :       if (!costing_p)
    8719              :         {
    8720            0 :           tree vtype = ls.ls_type ? ls.ls_type : vectype;
    8721            0 :           vect_get_strided_load_store_ops (stmt_info, slp_node, vtype,
    8722              :                                            ls.strided_offset_vectype,
    8723              :                                            loop_vinfo, gsi,
    8724              :                                            &bump, &vec_offset, loop_lens);
    8725              :         }
    8726              :     }
    8727              :   else
    8728              :     {
    8729      1328568 :       if (memory_access_type == VMAT_LOAD_STORE_LANES)
    8730            0 :         aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
    8731              :       else
    8732              :         aggr_type = vectype;
    8733      1328568 :       if (!costing_p)
    8734       541333 :         bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
    8735              :                                             memory_access_type, loop_lens);
    8736              :     }
    8737              : 
    8738      1330028 :   if (loop_vinfo && mask_node && !costing_p)
    8739          544 :     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
    8740              : 
    8741              :   /* In case the vectorization factor (VF) is bigger than the number
    8742              :      of elements that we can fit in a vectype (nunits), we have to generate
    8743              :      more than one vector stmt - i.e - we need to "unroll" the
    8744              :      vector stmt by a factor VF/nunits.  */
    8745              : 
    8746      1330028 :   auto_vec<tree> dr_chain (group_size);
    8747      1330028 :   auto_vec<tree> vec_masks;
    8748      1330028 :   tree vec_mask = NULL;
    8749      1330028 :   auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
    8750      5993859 :   for (i = 0; i < group_size; i++)
    8751      3333803 :     gvec_oprnds.quick_push (new auto_vec<tree> ());
    8752              : 
    8753      1330028 :   if (memory_access_type == VMAT_LOAD_STORE_LANES)
    8754              :     {
    8755            0 :       const internal_fn lanes_ifn = ls.lanes_ifn;
    8756              : 
    8757            0 :       if (costing_p)
    8758              :         /* Update all incoming store operand nodes, the general handling
    8759              :            above only handles the mask and the first store operand node.  */
    8760            0 :         for (slp_tree child : SLP_TREE_CHILDREN (slp_node))
    8761            0 :           if (child != mask_node
    8762            0 :               && !vect_maybe_update_slp_op_vectype (child, vectype))
    8763              :             {
    8764            0 :               if (dump_enabled_p ())
    8765            0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    8766              :                                  "incompatible vector types for invariants\n");
    8767            0 :               return false;
    8768              :             }
    8769            0 :       unsigned inside_cost = 0, prologue_cost = 0;
    8770              :       /* For costing some adjacent vector stores, we'd like to cost with
    8771              :          the total number of them once instead of cost each one by one. */
    8772            0 :       unsigned int n_adjacent_stores = 0;
    8773            0 :       int ncopies = vec_num / group_size;
    8774            0 :       for (j = 0; j < ncopies; j++)
    8775              :         {
    8776            0 :           if (j == 0)
    8777              :             {
    8778            0 :               if (!costing_p)
    8779              :                 {
    8780            0 :                   if (mask_node)
    8781              :                     {
    8782            0 :                       vect_get_slp_defs (mask_node, &vec_masks);
    8783            0 :                       vec_mask = vec_masks[0];
    8784              :                     }
    8785            0 :                   dataref_ptr
    8786            0 :                     = vect_create_data_ref_ptr (vinfo, first_stmt_info,
    8787              :                                                 aggr_type, NULL, offset, &dummy,
    8788              :                                                 gsi, &ptr_incr, false, bump);
    8789              :                 }
    8790              :             }
    8791            0 :           else if (!costing_p)
    8792              :             {
    8793            0 :               gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
    8794            0 :               if (mask_node)
    8795            0 :                 vec_mask = vec_masks[j];
    8796            0 :               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
    8797              :                                              stmt_info, bump);
    8798              :             }
    8799              : 
    8800            0 :           if (costing_p)
    8801              :             {
    8802            0 :               n_adjacent_stores += group_size;
    8803            0 :               continue;
    8804              :             }
    8805              : 
    8806              :           /* Get an array into which we can store the individual vectors.  */
    8807            0 :           tree vec_array = create_vector_array (vectype, group_size);
    8808              : 
    8809              :           /* Invalidate the current contents of VEC_ARRAY.  This should
    8810              :              become an RTL clobber too, which prevents the vector registers
    8811              :              from being upward-exposed.  */
    8812            0 :           vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
    8813              : 
    8814              :           /* Store the individual vectors into the array.  */
    8815            0 :           for (i = 0; i < group_size; i++)
    8816              :             {
    8817            0 :               slp_tree child;
    8818            0 :               if (i == 0 || !mask_node)
    8819            0 :                 child = SLP_TREE_CHILDREN (slp_node)[i];
    8820              :               else
    8821            0 :                 child = SLP_TREE_CHILDREN (slp_node)[i + 1];
    8822            0 :               vec_oprnd = SLP_TREE_VEC_DEFS (child)[j];
    8823            0 :               write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
    8824              :                                   i);
    8825              :             }
    8826              : 
    8827            0 :           tree final_mask = NULL;
    8828            0 :           tree final_len = NULL;
    8829            0 :           tree bias = NULL;
    8830            0 :           if (loop_masks)
    8831            0 :             final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
    8832              :                                              ncopies, vectype, j);
    8833            0 :           if (vec_mask)
    8834            0 :             final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
    8835              :                                            vec_mask, gsi);
    8836              : 
    8837            0 :           if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
    8838              :             {
    8839            0 :               if (loop_lens)
    8840            0 :                 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
    8841              :                                                ncopies, vectype, j, 1, true);
    8842              :               else
    8843            0 :                 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
    8844            0 :               signed char biasval
    8845            0 :                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
    8846            0 :               bias = build_int_cst (intQI_type_node, biasval);
    8847            0 :               if (!final_mask)
    8848              :                 {
    8849            0 :                   mask_vectype = truth_type_for (vectype);
    8850            0 :                   final_mask = build_minus_one_cst (mask_vectype);
    8851              :                 }
    8852              :             }
    8853              : 
    8854            0 :           gcall *call;
    8855            0 :           if (final_len && final_mask)
    8856              :             {
    8857              :               /* Emit:
    8858              :                    MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
    8859              :                                          LEN, BIAS, VEC_ARRAY).  */
    8860            0 :               unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
    8861            0 :               tree alias_ptr = build_int_cst (ref_type, align);
    8862            0 :               call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
    8863              :                                                  dataref_ptr, alias_ptr,
    8864              :                                                  final_mask, final_len, bias,
    8865              :                                                  vec_array);
    8866              :             }
    8867            0 :           else if (final_mask)
    8868              :             {
    8869              :               /* Emit:
    8870              :                    MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
    8871              :                                      VEC_ARRAY).  */
    8872            0 :               unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
    8873            0 :               tree alias_ptr = build_int_cst (ref_type, align);
    8874            0 :               call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
    8875              :                                                  dataref_ptr, alias_ptr,
    8876              :                                                  final_mask, vec_array);
    8877              :             }
    8878              :           else
    8879              :             {
    8880              :               /* Emit:
    8881              :                    MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
    8882            0 :               data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
    8883            0 :               call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
    8884            0 :               gimple_call_set_lhs (call, data_ref);
    8885              :             }
    8886            0 :           gimple_call_set_nothrow (call, true);
    8887            0 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    8888              : 
    8889              :           /* Record that VEC_ARRAY is now dead.  */
    8890            0 :           vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
    8891              :         }
    8892              : 
    8893            0 :       if (costing_p)
    8894              :         {
    8895            0 :           if (n_adjacent_stores > 0)
    8896            0 :             vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
    8897              :                                  alignment_support_scheme, misalignment,
    8898              :                                  &inside_cost, cost_vec);
    8899            0 :           if (dump_enabled_p ())
    8900            0 :             dump_printf_loc (MSG_NOTE, vect_location,
    8901              :                              "vect_model_store_cost: inside_cost = %d, "
    8902              :                              "prologue_cost = %d .\n",
    8903              :                              inside_cost, prologue_cost);
    8904              : 
    8905            0 :           SLP_TREE_TYPE (slp_node) = store_vec_info_type;
    8906            0 :           slp_node->data = new vect_load_store_data (std::move (ls));
    8907              :         }
    8908              : 
    8909            0 :       return true;
    8910              :     }
    8911              : 
    8912      1330028 :   if (mat_gather_scatter_p (memory_access_type))
    8913              :     {
    8914         1460 :       gcc_assert (!grouped_store || ls.ls_type);
    8915         1460 :       if (ls.ls_type)
    8916            0 :         vectype = ls.ls_type;
    8917         1460 :       auto_vec<tree> vec_offsets;
    8918         1460 :       unsigned int inside_cost = 0, prologue_cost = 0;
    8919         1460 :       int num_stmts = vec_num;
    8920         3319 :       for (j = 0; j < num_stmts; j++)
    8921              :         {
    8922         1859 :           gimple *new_stmt;
    8923         1859 :           if (j == 0)
    8924              :             {
    8925         1460 :               if (costing_p && vls_type == VLS_STORE_INVARIANT)
    8926          210 :                 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
    8927              :                                                    slp_node, 0, vect_prologue);
    8928              :               else if (!costing_p)
    8929              :                 {
    8930              :                   /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
    8931              :                      DR_CHAIN is of size 1.  */
    8932          473 :                   gcc_assert (group_size == 1);
    8933          473 :                   vect_get_slp_defs (op_node, gvec_oprnds[0]);
    8934          473 :                   if (mask_node)
    8935           70 :                     vect_get_slp_defs (mask_node, &vec_masks);
    8936              : 
    8937          473 :                   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    8938          473 :                     vect_get_gather_scatter_ops (loop, slp_node,
    8939              :                                                  &dataref_ptr, &vec_offsets);
    8940              :                   else
    8941            0 :                     dataref_ptr
    8942            0 :                       = vect_create_data_ref_ptr (vinfo, first_stmt_info,
    8943              :                                                   aggr_type, NULL, offset,
    8944              :                                                   &dummy, gsi, &ptr_incr, false,
    8945              :                                                   bump);
    8946              :                 }
    8947              :             }
    8948          399 :           else if (!costing_p)
    8949              :             {
    8950           35 :               gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
    8951           35 :               if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    8952            0 :                 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
    8953              :                                                gsi, stmt_info, bump);
    8954              :             }
    8955              : 
    8956         2577 :           new_stmt = NULL;
    8957          718 :           if (!costing_p)
    8958              :             {
    8959          508 :               vec_oprnd = (*gvec_oprnds[0])[j];
    8960          508 :               if (mask_node)
    8961           90 :                 vec_mask = vec_masks[j];
    8962              :               /* We should have caught mismatched types earlier.  */
    8963          508 :               gcc_assert (ls.ls_type
    8964              :                           || useless_type_conversion_p
    8965              :                           (vectype, TREE_TYPE (vec_oprnd)));
    8966              :             }
    8967          508 :           tree final_mask = NULL_TREE;
    8968         2367 :           tree final_len = NULL_TREE;
    8969         2367 :           tree bias = NULL_TREE;
    8970          508 :           if (!costing_p)
    8971              :             {
    8972          508 :               if (loop_masks)
    8973            0 :                 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
    8974              :                                                  loop_masks, num_stmts,
    8975              :                                                  vectype, j);
    8976          508 :               if (vec_mask)
    8977           90 :                 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
    8978              :                                                final_mask, vec_mask, gsi);
    8979              :             }
    8980              : 
    8981         1859 :           unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
    8982         1859 :           tree alias_align_ptr = build_int_cst (ref_type, align);
    8983         1859 :           if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
    8984              :             {
    8985            0 :               if (costing_p)
    8986              :                 {
    8987            0 :                   if (ls.supported_offset_vectype
    8988            0 :                       && !tree_nop_conversion_p (ls.supported_offset_vectype,
    8989              :                                                  vec_offset))
    8990            0 :                     inside_cost
    8991            0 :                       += record_stmt_cost (cost_vec, 1, vector_stmt,
    8992              :                                            slp_node, 0, vect_body);
    8993            0 :                   if (ls.supported_scale)
    8994            0 :                     inside_cost
    8995            0 :                       += record_stmt_cost (cost_vec, 1, vector_stmt,
    8996              :                                            slp_node, 0, vect_body);
    8997              : 
    8998            0 :                   unsigned int cnunits = vect_nunits_for_cost (vectype);
    8999            0 :                   inside_cost
    9000            0 :                     += record_stmt_cost (cost_vec, cnunits, scalar_store,
    9001              :                                          slp_node, 0, vect_body);
    9002         1859 :                   continue;
    9003            0 :                 }
    9004              : 
    9005            0 :               if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    9006            0 :                 vec_offset = vec_offsets[j];
    9007              : 
    9008            0 :               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
    9009            0 :               bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
    9010              : 
    9011              :               /* Perform the offset conversion and scaling if necessary.  */
    9012            0 :               if (!strided
    9013            0 :                   && (ls.supported_offset_vectype || ls.supported_scale))
    9014              :                 {
    9015            0 :                   gimple_seq stmts = NULL;
    9016            0 :                   if (ls.supported_offset_vectype)
    9017            0 :                     vec_offset = gimple_convert
    9018            0 :                       (&stmts, ls.supported_offset_vectype, vec_offset);
    9019            0 :                   if (ls.supported_scale)
    9020              :                     {
    9021              :                       /* Only scale the vec_offset if we haven't already.  */
    9022            0 :                       if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
    9023            0 :                           || j == 0)
    9024              :                         {
    9025            0 :                           tree mult_cst = build_int_cst
    9026            0 :                             (TREE_TYPE (TREE_TYPE (vec_offset)),
    9027            0 :                              SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
    9028            0 :                           tree mult = build_vector_from_val
    9029            0 :                             (TREE_TYPE (vec_offset), mult_cst);
    9030            0 :                           vec_offset = gimple_build
    9031            0 :                             (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
    9032              :                              vec_offset, mult);
    9033              :                         }
    9034            0 :                       scale = size_int (ls.supported_scale);
    9035              :                     }
    9036            0 :                   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
    9037              :                 }
    9038              : 
    9039            0 :               if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
    9040              :                 {
    9041            0 :                   if (loop_lens)
    9042            0 :                     final_len = vect_get_loop_len (loop_vinfo, gsi,
    9043              :                                                    loop_lens, num_stmts,
    9044              :                                                    vectype, j, 1, true);
    9045              :                   else
    9046            0 :                     final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
    9047              : 
    9048            0 :                   signed char biasval
    9049            0 :                     = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
    9050            0 :                   bias = build_int_cst (intQI_type_node, biasval);
    9051            0 :                   if (!final_mask)
    9052              :                     {
    9053            0 :                       mask_vectype = truth_type_for (vectype);
    9054            0 :                       final_mask = build_minus_one_cst (mask_vectype);
    9055              :                     }
    9056              :                 }
    9057              : 
    9058            0 :               if (ls.ls_type)
    9059              :                 {
    9060            0 :                   gimple *conv_stmt
    9061            0 :                     = gimple_build_assign (make_ssa_name (vectype),
    9062              :                                            VIEW_CONVERT_EXPR,
    9063              :                                            build1 (VIEW_CONVERT_EXPR, vectype,
    9064              :                                                    vec_oprnd));
    9065            0 :                   vect_finish_stmt_generation (vinfo, stmt_info, conv_stmt,
    9066              :                                                gsi);
    9067            0 :                   vec_oprnd = gimple_get_lhs (conv_stmt);
    9068              :                 }
    9069              : 
    9070            0 :               gcall *call;
    9071            0 :               if (final_len && final_mask)
    9072              :                 {
    9073            0 :                   if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
    9074            0 :                     call = gimple_build_call_internal (
    9075              :                             IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
    9076              :                             alias_align_ptr,
    9077              :                             vec_offset, scale, vec_oprnd, final_mask, final_len,
    9078              :                             bias);
    9079              :                   else
    9080              :                     /* Non-vector offset indicates that prefer to take
    9081              :                        MASK_LEN_STRIDED_STORE instead of the
    9082              :                        IFN_MASK_SCATTER_STORE with direct stride arg.
    9083              :                        Similar to the gather case we have checked the
    9084              :                        alignment for a scatter already and assume
    9085              :                        that the strided store has the same requirements.  */
    9086            0 :                     call = gimple_build_call_internal (
    9087              :                             IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
    9088              :                             vec_offset, vec_oprnd, final_mask, final_len, bias);
    9089              :                 }
    9090            0 :               else if (final_mask)
    9091            0 :                 call = gimple_build_call_internal
    9092            0 :                              (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
    9093              :                               alias_align_ptr,
    9094              :                               vec_offset, scale, vec_oprnd, final_mask);
    9095              :               else
    9096            0 :                 call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
    9097              :                                                    dataref_ptr,
    9098              :                                                    alias_align_ptr,
    9099              :                                                    vec_offset,
    9100              :                                                    scale, vec_oprnd);
    9101            0 :               gimple_call_set_nothrow (call, true);
    9102            0 :               vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    9103            0 :               new_stmt = call;
    9104              :             }
    9105         1859 :           else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
    9106              :             {
    9107              :               /* The builtin decls path for scatter is legacy, x86 only.  */
    9108          330 :               gcc_assert (nunits.is_constant ()
    9109              :                           && (!final_mask
    9110              :                               || SCALAR_INT_MODE_P
    9111              :                                    (TYPE_MODE (TREE_TYPE (final_mask)))));
    9112          330 :               if (costing_p)
    9113              :                 {
    9114          199 :                   unsigned int cnunits = vect_nunits_for_cost (vectype);
    9115          199 :                   inside_cost
    9116          199 :                     += record_stmt_cost (cost_vec, cnunits, scalar_store,
    9117              :                                          slp_node, 0, vect_body);
    9118          199 :                   continue;
    9119          199 :                 }
    9120              : 
    9121          131 :                 tree offset_vectype = TREE_TYPE (vec_offsets[0]);
    9122          131 :                 poly_uint64 offset_nunits
    9123          131 :                   = TYPE_VECTOR_SUBPARTS (offset_vectype);
    9124          131 :                 if (known_eq (nunits, offset_nunits))
    9125              :                   {
    9126           55 :                     new_stmt = vect_build_one_scatter_store_call
    9127          110 :                                    (vinfo, stmt_info, slp_node, gsi,
    9128           55 :                                     ls.gs.decl, dataref_ptr, vec_offsets[j],
    9129              :                                     vec_oprnd, final_mask);
    9130           55 :                     vect_finish_stmt_generation (vinfo, stmt_info,
    9131              :                                                  new_stmt, gsi);
    9132              :                   }
    9133           76 :                 else if (known_eq (nunits, offset_nunits * 2))
    9134              :                   {
    9135              :                     /* We have a offset vector with half the number of
    9136              :                        lanes but the builtins will store full vectype
    9137              :                        data from the lower lanes.  */
    9138           30 :                     new_stmt = vect_build_one_scatter_store_call
    9139           60 :                                    (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
    9140           30 :                                     dataref_ptr, vec_offsets[2 * j],
    9141              :                                     vec_oprnd, final_mask);
    9142           30 :                     vect_finish_stmt_generation (vinfo, stmt_info,
    9143              :                                                    new_stmt, gsi);
    9144           30 :                     int count = nunits.to_constant ();
    9145           30 :                     vec_perm_builder sel (count, count, 1);
    9146           30 :                     sel.quick_grow (count);
    9147          382 :                     for (int i = 0; i < count; ++i)
    9148          352 :                       sel[i] = i | (count / 2);
    9149           30 :                     vec_perm_indices indices (sel, 2, count);
    9150           30 :                     tree perm_mask
    9151           30 :                       = vect_gen_perm_mask_checked (vectype, indices);
    9152           30 :                     new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
    9153              :                                                     vec_oprnd, vec_oprnd,
    9154              :                                                     perm_mask);
    9155           30 :                     vec_oprnd = make_ssa_name (vectype);
    9156           30 :                     gimple_set_lhs (new_stmt, vec_oprnd);
    9157           30 :                     vect_finish_stmt_generation (vinfo, stmt_info,
    9158              :                                                  new_stmt, gsi);
    9159           30 :                     if (final_mask)
    9160              :                       {
    9161           20 :                         new_stmt = gimple_build_assign (NULL_TREE,
    9162              :                                                         VEC_UNPACK_HI_EXPR,
    9163              :                                                         final_mask);
    9164           20 :                         final_mask = make_ssa_name
    9165           20 :                                       (truth_type_for (offset_vectype));
    9166           20 :                         gimple_set_lhs (new_stmt, final_mask);
    9167           20 :                         vect_finish_stmt_generation (vinfo, stmt_info,
    9168              :                                                      new_stmt, gsi);
    9169              :                         }
    9170              : 
    9171           30 :                     new_stmt = vect_build_one_scatter_store_call
    9172           60 :                                   (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
    9173           30 :                                    dataref_ptr, vec_offsets[2 * j + 1],
    9174              :                                    vec_oprnd, final_mask);
    9175           30 :                     vect_finish_stmt_generation (vinfo, stmt_info,
    9176              :                                                  new_stmt, gsi);
    9177           30 :                   }
    9178           46 :                 else if (known_eq (nunits * 2, offset_nunits))
    9179              :                   {
    9180              :                     /* We have a offset vector with double the number of
    9181              :                        lanes.  Select the low/high part accordingly.  */
    9182           46 :                     vec_offset = vec_offsets[j / 2];
    9183           46 :                     if (j & 1)
    9184              :                       {
    9185           23 :                         int count = offset_nunits.to_constant ();
    9186           23 :                         vec_perm_builder sel (count, count, 1);
    9187           23 :                         sel.quick_grow (count);
    9188          263 :                         for (int i = 0; i < count; ++i)
    9189          240 :                           sel[i] = i | (count / 2);
    9190           23 :                         vec_perm_indices indices (sel, 2, count);
    9191           23 :                         tree perm_mask = vect_gen_perm_mask_checked
    9192           23 :                                            (TREE_TYPE (vec_offset), indices);
    9193           23 :                         new_stmt = gimple_build_assign (NULL_TREE,
    9194              :                                                         VEC_PERM_EXPR,
    9195              :                                                         vec_offset,
    9196              :                                                         vec_offset,
    9197              :                                                         perm_mask);
    9198           23 :                         vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
    9199           23 :                         gimple_set_lhs (new_stmt, vec_offset);
    9200           23 :                         vect_finish_stmt_generation (vinfo, stmt_info,
    9201              :                                                      new_stmt, gsi);
    9202           23 :                       }
    9203              : 
    9204           46 :                     new_stmt = vect_build_one_scatter_store_call
    9205           46 :                                    (vinfo, stmt_info, slp_node, gsi,
    9206              :                                     ls.gs.decl, dataref_ptr, vec_offset,
    9207              :                                     vec_oprnd, final_mask);
    9208           46 :                     vect_finish_stmt_generation (vinfo, stmt_info,
    9209              :                                                  new_stmt, gsi);
    9210              :                   }
    9211              :                 else
    9212            0 :                   gcc_unreachable ();
    9213              :             }
    9214              :           else
    9215              :             {
    9216              :               /* Emulated scatter.  */
    9217         1529 :               gcc_assert (!final_mask);
    9218         1529 :               if (costing_p)
    9219              :                 {
    9220         1152 :                   unsigned int cnunits = vect_nunits_for_cost (vectype);
    9221              :                   /* For emulated scatter N offset vector element extracts
    9222              :                      (we assume the scalar scaling and ptr + offset add is
    9223              :                      consumed by the load).  */
    9224         1152 :                   inside_cost
    9225         1152 :                     += record_stmt_cost (cost_vec, 1, vec_deconstruct,
    9226              :                                          slp_node, 0, vect_body);
    9227              :                   /* N scalar stores plus extracting the elements.  */
    9228         1152 :                   inside_cost
    9229         1152 :                     += record_stmt_cost (cost_vec, 1, vec_deconstruct,
    9230              :                                          slp_node, 0, vect_body);
    9231         1152 :                   inside_cost
    9232         1152 :                     += record_stmt_cost (cost_vec, cnunits, scalar_store,
    9233              :                                          slp_node, 0, vect_body);
    9234         1152 :                   continue;
    9235         1152 :                 }
    9236              : 
    9237          377 :               tree offset_vectype = TREE_TYPE (vec_offsets[0]);
    9238          377 :               unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
    9239          377 :               unsigned HOST_WIDE_INT const_offset_nunits
    9240          377 :                 = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
    9241          377 :               vec<constructor_elt, va_gc> *ctor_elts;
    9242          377 :               vec_alloc (ctor_elts, const_nunits);
    9243          377 :               gimple_seq stmts = NULL;
    9244          377 :               tree elt_type = TREE_TYPE (vectype);
    9245          377 :               unsigned HOST_WIDE_INT elt_size
    9246          377 :                 = tree_to_uhwi (TYPE_SIZE (elt_type));
    9247              :               /* We support offset vectors with more elements
    9248              :                  than the data vector for now.  */
    9249          377 :               unsigned HOST_WIDE_INT factor
    9250              :                 = const_offset_nunits / const_nunits;
    9251          377 :               vec_offset = vec_offsets[j / factor];
    9252          377 :               unsigned elt_offset
    9253          377 :                 = (j % factor) * const_nunits;
    9254          377 :               tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
    9255          377 :               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
    9256          377 :               tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
    9257         1531 :               for (unsigned k = 0; k < const_nunits; ++k)
    9258              :                 {
    9259              :                   /* Compute the offsetted pointer.  */
    9260         1154 :                   tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
    9261              :                                           bitsize_int (k + elt_offset));
    9262         1154 :                   tree idx
    9263         2308 :                     = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
    9264         1154 :                                     vec_offset, TYPE_SIZE (idx_type), boff);
    9265         1154 :                   idx = gimple_convert (&stmts, sizetype, idx);
    9266         1154 :                   idx = gimple_build (&stmts, MULT_EXPR, sizetype,
    9267              :                                       idx, scale);
    9268         1154 :                   tree ptr
    9269         1154 :                     = gimple_build (&stmts, PLUS_EXPR,
    9270         1154 :                                     TREE_TYPE (dataref_ptr),
    9271              :                                     dataref_ptr, idx);
    9272         1154 :                   ptr = gimple_convert (&stmts, ptr_type_node, ptr);
    9273              :                   /* Extract the element to be stored.  */
    9274         1154 :                   tree elt
    9275         2308 :                     = gimple_build (&stmts, BIT_FIELD_REF,
    9276         1154 :                                     TREE_TYPE (vectype),
    9277         1154 :                                     vec_oprnd, TYPE_SIZE (elt_type),
    9278         1154 :                                     bitsize_int (k * elt_size));
    9279         1154 :                   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
    9280         1154 :                   stmts = NULL;
    9281         1154 :                   tree ref
    9282         1154 :                     = build2 (MEM_REF, ltype, ptr,
    9283              :                               build_int_cst (ref_type, 0));
    9284         1154 :                   new_stmt = gimple_build_assign (ref, elt);
    9285         1154 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    9286              :                 }
    9287              : 
    9288          377 :               slp_node->push_vec_def (new_stmt);
    9289              :             }
    9290              :         }
    9291              : 
    9292         1460 :       if (costing_p)
    9293              :         {
    9294          987 :           if (dump_enabled_p ())
    9295           78 :             dump_printf_loc (MSG_NOTE, vect_location,
    9296              :                              "vect_model_store_cost: inside_cost = %d, "
    9297              :                              "prologue_cost = %d .\n",
    9298              :                              inside_cost, prologue_cost);
    9299          987 :           SLP_TREE_TYPE (slp_node) = store_vec_info_type;
    9300          987 :           slp_node->data = new vect_load_store_data (std::move (ls));
    9301              :         }
    9302              : 
    9303         1460 :       return true;
    9304         1460 :     }
    9305              : 
    9306      1328568 :   gcc_assert (memory_access_type == VMAT_CONTIGUOUS
    9307              :               || memory_access_type == VMAT_CONTIGUOUS_DOWN
    9308              :               || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
    9309              : 
    9310      1328568 :   unsigned inside_cost = 0, prologue_cost = 0;
    9311              :   /* For costing some adjacent vector stores, we'd like to cost with
    9312              :      the total number of them once instead of cost each one by one. */
    9313      1328568 :   unsigned int n_adjacent_stores = 0;
    9314      1328568 :   auto_vec<tree> result_chain (group_size);
    9315      1328568 :   auto_vec<tree, 1> vec_oprnds;
    9316      1328568 :   gimple *new_stmt;
    9317      1328568 :   if (!costing_p)
    9318              :     {
    9319              :       /* Get vectorized arguments for SLP_NODE.  */
    9320       541333 :       vect_get_slp_defs (op_node, &vec_oprnds);
    9321       541333 :       vec_oprnd = vec_oprnds[0];
    9322       541333 :       if (mask_node)
    9323              :         {
    9324          474 :           vect_get_slp_defs (mask_node, &vec_masks);
    9325          474 :           vec_mask = vec_masks[0];
    9326              :         }
    9327              :     }
    9328              : 
    9329              :   /* We should have caught mismatched types earlier.  */
    9330       541333 :   gcc_assert (costing_p
    9331              :               || useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
    9332      1328568 :   bool simd_lane_access_p
    9333      1328568 :       = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
    9334      1328568 :   if (!costing_p
    9335      1328568 :       && simd_lane_access_p
    9336         4374 :       && !loop_masks
    9337         4374 :       && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
    9338         4374 :       && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
    9339         4374 :       && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
    9340         4374 :       && integer_zerop (DR_INIT (first_dr_info->dr))
    9341      1332942 :       && alias_sets_conflict_p (get_alias_set (aggr_type),
    9342         4374 :                                 get_alias_set (TREE_TYPE (ref_type))))
    9343              :     {
    9344         4366 :       dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
    9345         4366 :       dataref_offset = build_int_cst (ref_type, 0);
    9346              :     }
    9347      1324202 :   else if (!costing_p)
    9348      1073926 :     dataref_ptr = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
    9349              :                                             simd_lane_access_p ? loop : NULL,
    9350              :                                             offset, &dummy, gsi, &ptr_incr,
    9351              :                                             simd_lane_access_p, bump);
    9352              : 
    9353      1328568 :   new_stmt = NULL;
    9354      1328568 :   gcc_assert (!grouped_store);
    9355      2953517 :   for (i = 0; i < vec_num; i++)
    9356              :     {
    9357      1624949 :       if (!costing_p)
    9358       670460 :         vec_oprnd = vec_oprnds[i];
    9359              : 
    9360      1624949 :       if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
    9361              :         {
    9362         3331 :           if (costing_p)
    9363         2191 :             inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
    9364              :                                              slp_node, 0, vect_body);
    9365              :           else
    9366              :             {
    9367         1140 :               tree perm_mask = perm_mask_for_reverse (vectype);
    9368         1140 :               tree new_temp = make_ssa_name (vectype);
    9369              : 
    9370              :               /* Generate the permute statement.  */
    9371         1140 :               gimple *perm_stmt
    9372         1140 :                 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
    9373              :                                        vec_oprnd, perm_mask);
    9374         1140 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    9375              : 
    9376         1140 :               perm_stmt = SSA_NAME_DEF_STMT (new_temp);
    9377      1624949 :               vec_oprnd = new_temp;
    9378              :             }
    9379              :         }
    9380              : 
    9381      1624949 :       if (costing_p)
    9382              :         {
    9383       954489 :           n_adjacent_stores++;
    9384       954489 :           continue;
    9385              :         }
    9386              : 
    9387       670460 :       tree final_mask = NULL_TREE;
    9388       670460 :       tree final_len = NULL_TREE;
    9389       670460 :       tree bias = NULL_TREE;
    9390       670460 :       if (loop_masks)
    9391           77 :         final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
    9392              :                                          vec_num, vectype, i);
    9393       670460 :       if (vec_mask)
    9394          695 :         vec_mask = vec_masks[i];
    9395          695 :       if (vec_mask)
    9396          695 :         final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
    9397              :                                        vec_mask, gsi);
    9398              : 
    9399       670460 :       if (i > 0)
    9400              :         /* Bump the vector pointer.  */
    9401       129127 :         dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
    9402              :                                        stmt_info, bump);
    9403              : 
    9404       670460 :       unsigned misalign;
    9405       670460 :       unsigned HOST_WIDE_INT align;
    9406       670460 :       align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
    9407       670460 :       if (alignment_support_scheme == dr_aligned)
    9408              :         misalign = 0;
    9409       308368 :       else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
    9410              :         {
    9411       160848 :           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
    9412       160848 :           misalign = 0;
    9413              :         }
    9414              :       else
    9415       147520 :         misalign = misalignment;
    9416       670460 :       if (dataref_offset == NULL_TREE
    9417       665080 :           && TREE_CODE (dataref_ptr) == SSA_NAME)
    9418       182323 :         set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
    9419       670460 :       align = least_bit_hwi (misalign | align);
    9420              : 
    9421              :       /* Compute IFN when LOOP_LENS or final_mask valid.  */
    9422       670460 :       machine_mode vmode = TYPE_MODE (vectype);
    9423       670460 :       machine_mode new_vmode = vmode;
    9424       670460 :       internal_fn partial_ifn = IFN_LAST;
    9425       670460 :       if (loop_lens)
    9426              :         {
    9427            0 :           opt_machine_mode new_ovmode
    9428            0 :             = get_len_load_store_mode (vmode, false, &partial_ifn);
    9429            0 :           new_vmode = new_ovmode.require ();
    9430            0 :           unsigned factor
    9431            0 :             = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
    9432            0 :           final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
    9433              :                                          vec_num, vectype, i, factor, true);
    9434              :         }
    9435       670460 :       else if (final_mask)
    9436              :         {
    9437          707 :           if (!can_vec_mask_load_store_p (vmode,
    9438          707 :                                           TYPE_MODE (TREE_TYPE (final_mask)),
    9439              :                                           false, &partial_ifn))
    9440            0 :             gcc_unreachable ();
    9441              :         }
    9442              : 
    9443       670460 :       if (partial_ifn == IFN_MASK_LEN_STORE)
    9444              :         {
    9445            0 :           if (!final_len)
    9446              :             {
    9447              :               /* Pass VF value to 'len' argument of
    9448              :                  MASK_LEN_STORE if LOOP_LENS is invalid.  */
    9449            0 :               final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
    9450              :             }
    9451            0 :           if (!final_mask)
    9452              :             {
    9453              :               /* Pass all ones value to 'mask' argument of
    9454              :                  MASK_LEN_STORE if final_mask is invalid.  */
    9455            0 :               mask_vectype = truth_type_for (vectype);
    9456            0 :               final_mask = build_minus_one_cst (mask_vectype);
    9457              :             }
    9458              :         }
    9459       670460 :       if (final_len)
    9460              :         {
    9461            0 :           signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
    9462            0 :           bias = build_int_cst (intQI_type_node, biasval);
    9463              :         }
    9464              : 
    9465              :       /* Arguments are ready.  Create the new vector stmt.  */
    9466       670460 :       if (final_len)
    9467              :         {
    9468            0 :           gcall *call;
    9469            0 :           tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
    9470              :           /* Need conversion if it's wrapped with VnQI.  */
    9471            0 :           if (vmode != new_vmode)
    9472              :             {
    9473            0 :               tree new_vtype
    9474            0 :                 = build_vector_type_for_mode (unsigned_intQI_type_node,
    9475              :                                               new_vmode);
    9476            0 :               tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
    9477            0 :               vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
    9478            0 :               gassign *new_stmt
    9479            0 :                 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
    9480            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    9481            0 :               vec_oprnd = var;
    9482              :             }
    9483              : 
    9484            0 :           if (partial_ifn == IFN_MASK_LEN_STORE)
    9485            0 :             call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
    9486              :                                                dataref_ptr, ptr, final_mask,
    9487              :                                                final_len, bias, vec_oprnd);
    9488              :           else
    9489            0 :             call = gimple_build_call_internal (IFN_LEN_STORE, 5,
    9490              :                                                dataref_ptr, ptr, final_len,
    9491              :                                                bias, vec_oprnd);
    9492            0 :           gimple_call_set_nothrow (call, true);
    9493            0 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    9494            0 :           new_stmt = call;
    9495              :         }
    9496       670460 :       else if (final_mask)
    9497              :         {
    9498          707 :           tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
    9499          707 :           gcall *call
    9500          707 :             = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
    9501              :                                           ptr, final_mask, vec_oprnd);
    9502          707 :           gimple_call_set_nothrow (call, true);
    9503          707 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
    9504          707 :           new_stmt = call;
    9505              :         }
    9506              :       else
    9507              :         {
    9508       669753 :           data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
    9509              :                                   dataref_offset ? dataref_offset
    9510              :                                   : build_int_cst (ref_type, 0));
    9511       669753 :           if (alignment_support_scheme == dr_aligned
    9512       669753 :               && align >= TYPE_ALIGN_UNIT (vectype))
    9513              :             ;
    9514              :           else
    9515       307831 :             TREE_TYPE (data_ref)
    9516       615662 :               = build_aligned_type (TREE_TYPE (data_ref),
    9517              :                                     align * BITS_PER_UNIT);
    9518       669753 :           vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
    9519       669753 :           new_stmt = gimple_build_assign (data_ref, vec_oprnd);
    9520       669753 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
    9521              :         }
    9522              :     }
    9523              : 
    9524      1328568 :   if (costing_p)
    9525              :     {
    9526       787235 :       if (n_adjacent_stores > 0)
    9527       787235 :         vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
    9528              :                              alignment_support_scheme, misalignment,
    9529              :                              &inside_cost, cost_vec);
    9530              : 
    9531              :       /* When vectorizing a store into the function result assign
    9532              :          a penalty if the function returns in a multi-register location.
    9533              :          In this case we assume we'll end up with having to spill the
    9534              :          vector result and do piecewise loads as a conservative estimate.  */
    9535       787235 :       tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
    9536       787235 :       if (base
    9537       787235 :           && (TREE_CODE (base) == RESULT_DECL
    9538       736903 :               || (DECL_P (base) && cfun_returns (base)))
    9539       849239 :           && !aggregate_value_p (base, cfun->decl))
    9540              :         {
    9541        11073 :           rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
    9542              :           /* ???  Handle PARALLEL in some way.  */
    9543        11073 :           if (REG_P (reg))
    9544              :             {
    9545        10869 :               int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
    9546              :               /* Assume that a single reg-reg move is possible and cheap,
    9547              :                  do not account for vector to gp register move cost.  */
    9548        10869 :               if (nregs > 1)
    9549              :                 {
    9550              :                   /* Spill.  */
    9551        10038 :                   prologue_cost
    9552        10038 :                     += record_stmt_cost (cost_vec, 1, vector_store,
    9553              :                                          slp_node, 0, vect_epilogue);
    9554              :                   /* Loads.  */
    9555        10038 :                   prologue_cost
    9556        10038 :                     += record_stmt_cost (cost_vec, nregs, scalar_load,
    9557              :                                          slp_node, 0, vect_epilogue);
    9558              :                 }
    9559              :             }
    9560              :         }
    9561       787235 :       if (dump_enabled_p ())
    9562        13849 :         dump_printf_loc (MSG_NOTE, vect_location,
    9563              :                          "vect_model_store_cost: inside_cost = %d, "
    9564              :                          "prologue_cost = %d .\n",
    9565              :                          inside_cost, prologue_cost);
    9566              : 
    9567       787235 :       SLP_TREE_TYPE (slp_node) = store_vec_info_type;
    9568       787235 :       slp_node->data = new vect_load_store_data (std::move (ls));
    9569              :     }
    9570              : 
    9571      1328568 :   return true;
    9572      2689641 : }
    9573              : 
    9574              : /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
    9575              :    VECTOR_CST mask.  No checks are made that the target platform supports the
    9576              :    mask, so callers may wish to test can_vec_perm_const_p separately, or use
    9577              :    vect_gen_perm_mask_checked.  */
    9578              : 
    9579              : tree
    9580        61841 : vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
    9581              : {
    9582        61841 :   tree mask_type;
    9583              : 
    9584        61841 :   poly_uint64 nunits = sel.length ();
    9585        61841 :   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
    9586              : 
    9587        61841 :   mask_type = build_vector_type (ssizetype, nunits);
    9588        61841 :   return vec_perm_indices_to_tree (mask_type, sel);
    9589              : }
    9590              : 
    9591              : /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
    9592              :    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
    9593              : 
    9594              : tree
    9595        58978 : vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
    9596              : {
    9597        58978 :   machine_mode vmode = TYPE_MODE (vectype);
    9598        58978 :   gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
    9599        58978 :   return vect_gen_perm_mask_any (vectype, sel);
    9600              : }
    9601              : 
    9602              : /* Given a vector variable X and Y, that was generated for the scalar
    9603              :    STMT_INFO, generate instructions to permute the vector elements of X and Y
    9604              :    using permutation mask MASK_VEC, insert them at *GSI and return the
    9605              :    permuted vector variable.  */
    9606              : 
    9607              : static tree
    9608         1445 : permute_vec_elements (vec_info *vinfo,
    9609              :                       tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
    9610              :                       gimple_stmt_iterator *gsi)
    9611              : {
    9612         1445 :   tree vectype = TREE_TYPE (x);
    9613         1445 :   tree perm_dest, data_ref;
    9614         1445 :   gimple *perm_stmt;
    9615              : 
    9616         1445 :   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
    9617         1445 :   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
    9618         1445 :     perm_dest = vect_create_destination_var (scalar_dest, vectype);
    9619              :   else
    9620            0 :     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
    9621         1445 :   data_ref = make_ssa_name (perm_dest);
    9622              : 
    9623              :   /* Generate the permute statement.  */
    9624         1445 :   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
    9625         1445 :   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    9626              : 
    9627         1445 :   return data_ref;
    9628              : }
    9629              : 
    9630              : /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
    9631              :    inserting them on the loops preheader edge.  Returns true if we
    9632              :    were successful in doing so (and thus STMT_INFO can be moved then),
    9633              :    otherwise returns false.  HOIST_P indicates if we want to hoist the
    9634              :    definitions of all SSA uses, it would be false when we are costing.  */
    9635              : 
    9636              : static bool
    9637         4024 : hoist_defs_of_uses (gimple *stmt, class loop *loop, bool hoist_p)
    9638              : {
    9639         4024 :   ssa_op_iter i;
    9640         4024 :   use_operand_p use_p;
    9641         4024 :   auto_vec<use_operand_p, 8> to_hoist;
    9642              : 
    9643         7645 :   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_USE)
    9644              :     {
    9645         3649 :       gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
    9646         3649 :       if (!gimple_nop_p (def_stmt)
    9647         3649 :           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
    9648              :         {
    9649              :           /* Make sure we don't need to recurse.  While we could do
    9650              :              so in simple cases when there are more complex use webs
    9651              :              we don't have an easy way to preserve stmt order to fulfil
    9652              :              dependencies within them.  */
    9653          111 :           tree op2;
    9654          111 :           ssa_op_iter i2;
    9655          111 :           if (gimple_code (def_stmt) == GIMPLE_PHI
    9656          111 :               || (single_ssa_def_operand (def_stmt, SSA_OP_DEF)
    9657              :                   == NULL_DEF_OPERAND_P))
    9658           28 :             return false;
    9659          226 :           FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
    9660              :             {
    9661          143 :               gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
    9662          143 :               if (!gimple_nop_p (def_stmt2)
    9663          143 :                   && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
    9664              :                 return false;
    9665              :             }
    9666           83 :           to_hoist.safe_push (use_p);
    9667              :         }
    9668              :     }
    9669              : 
    9670         7992 :   if (to_hoist.is_empty ())
    9671              :     return true;
    9672              : 
    9673           59 :   if (!hoist_p)
    9674              :     return true;
    9675              : 
    9676              :   /* Instead of moving defs we copy them so we can zero their UID to not
    9677              :      confuse dominance queries in the preheader.  */
    9678            9 :   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    9679           36 :   for (use_operand_p use_p : to_hoist)
    9680              :     {
    9681            9 :       gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
    9682            9 :       gimple *copy = gimple_copy (def_stmt);
    9683            9 :       gimple_set_uid (copy, 0);
    9684            9 :       def_operand_p def_p = single_ssa_def_operand (def_stmt, SSA_OP_DEF);
    9685            9 :       tree new_def = duplicate_ssa_name (DEF_FROM_PTR (def_p), copy);
    9686            9 :       update_stmt (copy);
    9687            9 :       def_p = single_ssa_def_operand (copy, SSA_OP_DEF);
    9688            9 :       SET_DEF (def_p, new_def);
    9689            9 :       SET_USE (use_p, new_def);
    9690            9 :       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
    9691              :     }
    9692              : 
    9693              :   return true;
    9694         4024 : }
    9695              : 
    9696              : /* vectorizable_load.
    9697              : 
    9698              :    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
    9699              :    that can be vectorized.
    9700              :    If COST_VEC is passed, calculate costs but don't change anything,
    9701              :    otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
    9702              :    it, and insert it at GSI.
    9703              :    Return true if STMT_INFO is vectorizable in this way.  */
    9704              : 
    9705              : static bool
    9706      2133523 : vectorizable_load (vec_info *vinfo,
    9707              :                    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
    9708              :                    slp_tree slp_node,
    9709              :                    stmt_vector_for_cost *cost_vec)
    9710              : {
    9711      2133523 :   tree scalar_dest;
    9712      2133523 :   tree vec_dest = NULL;
    9713      2133523 :   tree data_ref = NULL;
    9714      2133523 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    9715      2133523 :   class loop *loop = NULL;
    9716      2133523 :   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
    9717      2133523 :   bool nested_in_vect_loop = false;
    9718      2133523 :   tree elem_type;
    9719              :   /* Avoid false positive uninitialized warning, see PR110652.  */
    9720      2133523 :   tree new_temp = NULL_TREE;
    9721      2133523 :   machine_mode mode;
    9722      2133523 :   tree dummy;
    9723      2133523 :   tree dataref_ptr = NULL_TREE;
    9724      2133523 :   tree dataref_offset = NULL_TREE;
    9725      2133523 :   gimple *ptr_incr = NULL;
    9726      2133523 :   int i, j;
    9727      2133523 :   unsigned int group_size;
    9728      2133523 :   poly_uint64 group_gap_adj;
    9729      2133523 :   tree msq = NULL_TREE, lsq;
    9730      2133523 :   tree realignment_token = NULL_TREE;
    9731      2133523 :   gphi *phi = NULL;
    9732      2133523 :   bool grouped_load = false;
    9733      2133523 :   stmt_vec_info first_stmt_info;
    9734      2133523 :   stmt_vec_info first_stmt_info_for_drptr = NULL;
    9735      2133523 :   bool compute_in_loop = false;
    9736      2133523 :   class loop *at_loop;
    9737      2133523 :   int vec_num;
    9738      2133523 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    9739      2133523 :   poly_uint64 vf;
    9740      2133523 :   tree aggr_type;
    9741      2133523 :   tree ref_type;
    9742      2133523 :   enum vect_def_type mask_dt = vect_unknown_def_type;
    9743      2133523 :   enum vect_def_type els_dt = vect_unknown_def_type;
    9744              : 
    9745      2133523 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    9746              :     return false;
    9747              : 
    9748      2133523 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
    9749       234683 :       && cost_vec)
    9750              :     return false;
    9751              : 
    9752      1898840 :   if (!STMT_VINFO_DATA_REF (stmt_info))
    9753              :     return false;
    9754              : 
    9755      1523624 :   tree mask_vectype = NULL_TREE;
    9756      1523624 :   tree els = NULL_TREE; tree els_vectype = NULL_TREE;
    9757              : 
    9758      1523624 :   int mask_index = -1;
    9759      1523624 :   int els_index = -1;
    9760      1523624 :   slp_tree mask_node = NULL;
    9761      1523624 :   slp_tree els_op = NULL;
    9762      1523624 :   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
    9763              :     {
    9764      1519173 :       scalar_dest = gimple_assign_lhs (assign);
    9765      1519173 :       if (TREE_CODE (scalar_dest) != SSA_NAME)
    9766              :         return false;
    9767              : 
    9768       705145 :       tree_code code = gimple_assign_rhs_code (assign);
    9769       705145 :       if (code != ARRAY_REF
    9770       705145 :           && code != BIT_FIELD_REF
    9771       705145 :           && code != INDIRECT_REF
    9772       486656 :           && code != COMPONENT_REF
    9773       486656 :           && code != IMAGPART_EXPR
    9774       351053 :           && code != REALPART_EXPR
    9775       351053 :           && code != MEM_REF
    9776          285 :           && TREE_CODE_CLASS (code) != tcc_declaration)
    9777              :         return false;
    9778              :     }
    9779              :   else
    9780              :     {
    9781      1430128 :       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
    9782         4451 :       if (!call || !gimple_call_internal_p (call))
    9783              :         return false;
    9784              : 
    9785         4451 :       internal_fn ifn = gimple_call_internal_fn (call);
    9786         4451 :       if (!internal_load_fn_p (ifn))
    9787              :         return false;
    9788              : 
    9789         3096 :       scalar_dest = gimple_call_lhs (call);
    9790         3096 :       if (!scalar_dest)
    9791              :         return false;
    9792              : 
    9793         3096 :       mask_index = internal_fn_mask_index (ifn);
    9794         3096 :       if (mask_index >= 0)
    9795         3096 :         mask_index = vect_slp_child_index_for_operand (stmt_info, mask_index);
    9796         3096 :       if (mask_index >= 0
    9797         3096 :           && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
    9798              :                                       &mask_node, &mask_dt, &mask_vectype))
    9799              :         return false;
    9800              : 
    9801         3096 :       els_index = internal_fn_else_index (ifn);
    9802         3096 :       if (els_index >= 0)
    9803         3096 :         els_index = vect_slp_child_index_for_operand (stmt_info, els_index);
    9804         3096 :       if (els_index >= 0
    9805         3096 :           && !vect_is_simple_use (vinfo, slp_node, els_index,
    9806              :                                   &els, &els_op, &els_dt, &els_vectype))
    9807              :         return false;
    9808              :     }
    9809              : 
    9810       708174 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
    9811       708174 :   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
    9812              : 
    9813       708174 :   if (loop_vinfo)
    9814              :     {
    9815       494822 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    9816       494822 :       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
    9817       494822 :       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    9818              :     }
    9819              :   else
    9820              :     vf = 1;
    9821              : 
    9822       708174 :   vec_num = vect_get_num_copies (vinfo, slp_node);
    9823              : 
    9824              :   /* FORNOW. This restriction should be relaxed.  */
    9825       708174 :   if (nested_in_vect_loop && vec_num > 1)
    9826              :     {
    9827          316 :       if (dump_enabled_p ())
    9828           66 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9829              :                          "multiple types in nested loop.\n");
    9830          316 :       return false;
    9831              :     }
    9832              : 
    9833       707858 :   elem_type = TREE_TYPE (vectype);
    9834       707858 :   mode = TYPE_MODE (vectype);
    9835              : 
    9836              :   /* FORNOW. In some cases can vectorize even if data-type not supported
    9837              :     (e.g. - data copies).  */
    9838       707858 :   if (!can_implement_p (mov_optab, mode))
    9839              :     {
    9840            0 :       if (dump_enabled_p ())
    9841            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9842              :                          "Aligned load, but unsupported type.\n");
    9843            0 :       return false;
    9844              :     }
    9845              : 
    9846              :   /* Check if the load is a part of an interleaving chain.  */
    9847       707858 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    9848              :     {
    9849       307918 :       grouped_load = true;
    9850              :       /* FORNOW */
    9851       307918 :       gcc_assert (!nested_in_vect_loop);
    9852       307918 :       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
    9853              : 
    9854       307918 :       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    9855       307918 :       group_size = DR_GROUP_SIZE (first_stmt_info);
    9856              : 
    9857              :       /* Invalidate assumptions made by dependence analysis when vectorization
    9858              :          on the unrolled body effectively re-orders stmts.  */
    9859       307918 :       if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
    9860       307918 :           && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
    9861              :                        STMT_VINFO_MIN_NEG_DIST (stmt_info)))
    9862              :         {
    9863           12 :           if (dump_enabled_p ())
    9864           12 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9865              :                              "cannot perform implicit CSE when performing "
    9866              :                              "group loads with negative dependence distance\n");
    9867           12 :           return false;
    9868              :         }
    9869              :     }
    9870              :   else
    9871              :     group_size = 1;
    9872              : 
    9873       707846 :   vect_load_store_data _ls_data{};
    9874       707846 :   vect_load_store_data &ls = slp_node->get_data (_ls_data);
    9875       707846 :   if (cost_vec
    9876       707846 :       && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
    9877              :                                VLS_LOAD, &ls))
    9878              :     return false;
    9879              :   /* Temporary aliases to analysis data, should not be modified through
    9880              :      these.  */
    9881       602405 :   const vect_memory_access_type memory_access_type = ls.memory_access_type;
    9882       602405 :   const dr_alignment_support alignment_support_scheme
    9883              :     = ls.alignment_support_scheme;
    9884       602405 :   const int misalignment = ls.misalignment;
    9885       602405 :   const poly_int64 poffset = ls.poffset;
    9886       602405 :   const vec<int> &elsvals = ls.elsvals;
    9887              : 
    9888       602405 :   int maskload_elsval = 0;
    9889       602405 :   bool need_zeroing = false;
    9890              : 
    9891              :   /* We might need to explicitly zero inactive elements if there are
    9892              :      padding bits in the type that might leak otherwise.
    9893              :      Refer to PR115336.  */
    9894       602405 :   tree scalar_type = TREE_TYPE (scalar_dest);
    9895       602405 :   bool type_mode_padding_p
    9896      1204810 :     = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
    9897              : 
    9898       602405 :   if (slp_node->ldst_lanes
    9899            0 :       && memory_access_type != VMAT_LOAD_STORE_LANES)
    9900              :     {
    9901            0 :       if (dump_enabled_p ())
    9902            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9903              :                          "discovered load-lane but cannot use it.\n");
    9904            0 :       return false;
    9905              :     }
    9906              : 
    9907       602405 :   if (mask_node)
    9908              :     {
    9909         2966 :       if (memory_access_type == VMAT_CONTIGUOUS)
    9910              :         {
    9911         2100 :           machine_mode vec_mode = TYPE_MODE (vectype);
    9912          721 :           if (!VECTOR_MODE_P (vec_mode)
    9913         4200 :               || !can_vec_mask_load_store_p (vec_mode,
    9914         2100 :                                              TYPE_MODE (mask_vectype),
    9915              :                                              true, NULL, &ls.elsvals))
    9916          351 :             return false;
    9917              :         }
    9918          866 :       else if (memory_access_type == VMAT_ELEMENTWISE
    9919          866 :                || memory_access_type == VMAT_STRIDED_SLP)
    9920              :         {
    9921            0 :           if (dump_enabled_p ())
    9922            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9923              :                              "unsupported masked strided access.\n");
    9924            0 :           return false;
    9925              :         }
    9926          866 :       else if (memory_access_type != VMAT_LOAD_STORE_LANES
    9927          866 :                && !mat_gather_scatter_p (memory_access_type))
    9928              :         {
    9929           62 :           if (dump_enabled_p ())
    9930            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9931              :                              "unsupported access type for masked load.\n");
    9932           62 :           return false;
    9933              :         }
    9934          804 :       else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
    9935              :         {
    9936          482 :           if (dump_enabled_p ())
    9937           28 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9938              :                              "unsupported masked emulated gather.\n");
    9939          482 :           return false;
    9940              :         }
    9941              :     }
    9942              : 
    9943       601510 :   bool costing_p = cost_vec;
    9944              : 
    9945       601510 :   if (costing_p) /* transformation not required.  */
    9946              :     {
    9947       434833 :       if (loop_vinfo
    9948       313803 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
    9949       212288 :         check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
    9950              :                                               VLS_LOAD, group_size, &ls,
    9951              :                                               mask_node, &ls.elsvals);
    9952              : 
    9953              :       /* If the type needs padding we must zero inactive elements.
    9954              :          Check if we can do that with a VEC_COND_EXPR and store the
    9955              :          elsval we choose in MASKLOAD_ELSVAL.  */
    9956       434833 :       if (ls.elsvals.length ()
    9957        60123 :           && type_mode_padding_p
    9958            7 :           && !ls.elsvals.contains (MASK_LOAD_ELSE_ZERO)
    9959        60123 :           && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
    9960              :         {
    9961            0 :           if (dump_enabled_p ())
    9962            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9963              :                              "cannot zero inactive elements.\n");
    9964            0 :           return false;
    9965              :         }
    9966              : 
    9967       434833 :       if (mask_node
    9968       434833 :           && !vect_maybe_update_slp_op_vectype (mask_node,
    9969              :                                                 mask_vectype))
    9970              :         {
    9971            0 :           if (dump_enabled_p ())
    9972            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    9973              :                              "incompatible vector types for invariants\n");
    9974            0 :           return false;
    9975              :         }
    9976              : 
    9977       434833 :       if (dump_enabled_p ()
    9978        25467 :           && memory_access_type != VMAT_ELEMENTWISE
    9979        25356 :           && !mat_gather_scatter_p (memory_access_type)
    9980        25041 :           && memory_access_type != VMAT_STRIDED_SLP
    9981        25041 :           && memory_access_type != VMAT_INVARIANT
    9982       458941 :           && alignment_support_scheme != dr_aligned)
    9983         9923 :         dump_printf_loc (MSG_NOTE, vect_location,
    9984              :                          "Vectorizing an unaligned access.\n");
    9985              : 
    9986       434833 :       if (memory_access_type == VMAT_LOAD_STORE_LANES)
    9987            0 :         vinfo->any_known_not_updated_vssa = true;
    9988              :     }
    9989              : 
    9990              :   /* For now just use the first available else value.
    9991              :      get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
    9992              :      select it here if it is supported.  */
    9993       601510 :   if (elsvals.length ())
    9994        83242 :     maskload_elsval = *elsvals.begin ();
    9995              : 
    9996       601510 :   if (dump_enabled_p () && !costing_p)
    9997        16661 :     dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
    9998              : 
    9999              :   /* Transform.  */
   10000              : 
   10001       601510 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
   10002       601510 :   ensure_base_align (dr_info);
   10003              : 
   10004       601510 :   if (memory_access_type == VMAT_INVARIANT)
   10005              :     {
   10006         4124 :       gcc_assert (!grouped_load && !mask_node && !bb_vinfo);
   10007              :       /* If we have versioned for aliasing or the loop doesn't
   10008              :          have any data dependencies that would preclude this,
   10009              :          then we are sure this is a loop invariant load and
   10010              :          thus we can insert it on the preheader edge.
   10011              :          TODO: hoist_defs_of_uses should ideally be computed
   10012              :          once at analysis time, remembered and used in the
   10013              :          transform time.  */
   10014         8248 :       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
   10015         4124 :                       && !nested_in_vect_loop);
   10016              : 
   10017         4124 :       bool uniform_p = true;
   10018        17270 :       for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
   10019              :         {
   10020              :           /* It is unsafe to hoist a conditional load over the conditions that
   10021              :              make it valid.  When early break this means that any invariant load
   10022              :              can't be hoisted unless it's in the loop header or if we know
   10023              :              something else has verified the load is valid to do.  Alignment
   10024              :              peeling would do this since getting through the prologue means the
   10025              :              load was done at least once and so the vector main body is free to
   10026              :              hoist it.  However today GCC will hoist the load above the PFA
   10027              :              loop.  As such that makes it still invalid and so we can't allow it
   10028              :              today.  */
   10029         4898 :           if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
   10030         1052 :               && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (sinfo))
   10031         5918 :               && gimple_bb (STMT_VINFO_STMT (vect_orig_stmt (sinfo)))
   10032         1020 :                   != loop->header)
   10033              :             {
   10034          920 :               if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
   10035          920 :                   && dump_enabled_p ())
   10036            6 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10037              :                              "not hoisting invariant load due to early break"
   10038              :                              "constraints\n");
   10039          914 :               else if (dump_enabled_p ())
   10040           16 :                dump_printf_loc (MSG_NOTE, vect_location,
   10041              :                              "not hoisting invariant load due to early break"
   10042              :                              "constraints\n");
   10043              :             hoist_p = false;
   10044              :           }
   10045              : 
   10046         3978 :           hoist_p = hoist_p && hoist_defs_of_uses (sinfo->stmt, loop, false);
   10047         4898 :           if (sinfo != SLP_TREE_SCALAR_STMTS (slp_node)[0])
   10048          279 :             uniform_p = false;
   10049              :         }
   10050         4124 :       if (costing_p)
   10051              :         {
   10052         3289 :           if (!uniform_p && (!hoist_p || !vf.is_constant ()))
   10053              :             {
   10054            0 :               if (dump_enabled_p ())
   10055            0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   10056              :                                  "not vectorizing non-uniform invariant "
   10057              :                                  "load\n");
   10058            0 :               return false;
   10059              :             }
   10060         1433 :           enum vect_cost_model_location cost_loc
   10061         3289 :             = hoist_p ? vect_prologue : vect_body;
   10062         3289 :           unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
   10063              :                                                 slp_node, 0, cost_loc);
   10064         3289 :           cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
   10065              :                                     slp_node, 0, cost_loc);
   10066         3289 :           unsigned int prologue_cost = hoist_p ? cost : 0;
   10067         1433 :           unsigned int inside_cost = hoist_p ? 0 : cost;
   10068         3289 :           if (dump_enabled_p ())
   10069          546 :             dump_printf_loc (MSG_NOTE, vect_location,
   10070              :                              "vect_model_load_cost: inside_cost = %d, "
   10071              :                              "prologue_cost = %d .\n",
   10072              :                              inside_cost, prologue_cost);
   10073         3289 :           SLP_TREE_TYPE (slp_node) = load_vec_info_type;
   10074         3289 :           slp_node->data = new vect_load_store_data (std::move (ls));
   10075         3289 :           return true;
   10076              :         }
   10077          835 :       if (hoist_p)
   10078              :         {
   10079              :           /* ???  For non-uniform lanes there could be still duplicates.
   10080              :              We're leaving those to post-vectorizer CSE for the moment.  */
   10081          638 :           auto_vec<tree> scalar_defs (SLP_TREE_LANES (slp_node));
   10082         2055 :           for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
   10083              :             {
   10084          728 :               gassign *stmt = as_a <gassign *> (sinfo->stmt);
   10085          728 :               if (dump_enabled_p ())
   10086          352 :                 dump_printf_loc (MSG_NOTE, vect_location,
   10087              :                                  "hoisting out of the vectorized loop: %G",
   10088              :                                  (gimple *) stmt);
   10089          728 :               scalar_dest = copy_ssa_name (gimple_assign_lhs (stmt));
   10090          728 :               tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
   10091          728 :               edge pe = loop_preheader_edge (loop);
   10092          728 :               gphi *vphi = get_virtual_phi (loop->header);
   10093          728 :               tree vuse;
   10094          728 :               if (vphi)
   10095          722 :                 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
   10096              :               else
   10097            6 :                 vuse = gimple_vuse (gsi_stmt (*gsi));
   10098          728 :               gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
   10099          728 :               gimple_set_vuse (new_stmt, vuse);
   10100          728 :               gsi_insert_on_edge_immediate (pe, new_stmt);
   10101          728 :               hoist_defs_of_uses (new_stmt, loop, true);
   10102          728 :               if (!useless_type_conversion_p (TREE_TYPE (vectype),
   10103          728 :                                               TREE_TYPE (scalar_dest)))
   10104              :                 {
   10105           12 :                   tree tem = make_ssa_name (TREE_TYPE (vectype));
   10106           12 :                   new_stmt = gimple_build_assign (tem,
   10107              :                                                   NOP_EXPR, scalar_dest);
   10108           12 :                   gsi_insert_on_edge_immediate (pe, new_stmt);
   10109           12 :                   scalar_dest = tem;
   10110              :                 }
   10111          728 :               scalar_defs.quick_push (scalar_dest);
   10112          728 :               if (uniform_p)
   10113              :                 break;
   10114              :             }
   10115          638 :           if (!uniform_p)
   10116              :             {
   10117           51 :               unsigned const_nunits
   10118           51 :                 = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
   10119          116 :               for (j = 0; j < (int) vec_num; ++j)
   10120              :                 {
   10121           65 :                   vec<constructor_elt, va_gc> *v = NULL;
   10122           65 :                   vec_safe_reserve (v, const_nunits, true);
   10123          369 :                   for (unsigned i = 0; i < const_nunits; ++i)
   10124              :                     {
   10125          304 :                       unsigned def_idx
   10126          304 :                         = (j * const_nunits + i) % SLP_TREE_LANES (slp_node);
   10127          304 :                       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   10128              :                                               scalar_defs[def_idx]);
   10129              :                     }
   10130           65 :                   scalar_dest = build_constructor (vectype, v);
   10131           65 :                   new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
   10132              :                                                vectype, NULL);
   10133           65 :                   slp_node->push_vec_def (new_temp);
   10134              :                 }
   10135           51 :               return true;
   10136              :             }
   10137          587 :           new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
   10138              :                                        vectype, NULL);
   10139          638 :         }
   10140              :       else
   10141              :         {
   10142          197 :           gcc_assert (uniform_p);
   10143          197 :           gimple_stmt_iterator gsi2 = *gsi;
   10144          197 :           gsi_next (&gsi2);
   10145          197 :           new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
   10146              :                                        vectype, &gsi2);
   10147              :         }
   10148         1642 :       for (j = 0; j < (int) vec_num; ++j)
   10149          858 :         slp_node->push_vec_def (new_temp);
   10150              :       return true;
   10151              :     }
   10152              : 
   10153       597386 :   if (memory_access_type == VMAT_ELEMENTWISE
   10154       597386 :       || memory_access_type == VMAT_STRIDED_SLP)
   10155              :     {
   10156        23519 :       gimple_stmt_iterator incr_gsi;
   10157        23519 :       bool insert_after;
   10158        23519 :       tree offvar = NULL_TREE;
   10159        23519 :       tree ivstep;
   10160        23519 :       tree running_off;
   10161        23519 :       vec<constructor_elt, va_gc> *v = NULL;
   10162        23519 :       tree stride_base, stride_step = NULL_TREE, alias_off;
   10163              :       /* Checked by get_load_store_type.  */
   10164        23519 :       unsigned int const_nunits = nunits.to_constant ();
   10165        23519 :       unsigned HOST_WIDE_INT cst_offset = 0;
   10166        23519 :       tree dr_offset;
   10167        23519 :       unsigned int inside_cost = 0;
   10168              : 
   10169        23519 :       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
   10170        23519 :       gcc_assert (!nested_in_vect_loop);
   10171              : 
   10172        23519 :       if (grouped_load)
   10173              :         {
   10174              :           /* If we elided a consecutive load permutation, don't
   10175              :              use the original first statement (which could be elided)
   10176              :              but the one the load permutation starts with.
   10177              :              This ensures the stride_base below is correct.  */
   10178        10734 :           if (!ls.subchain_p)
   10179        10690 :             first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   10180              :           else
   10181           44 :             first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   10182        10734 :           first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   10183        10734 :           ref_type = get_group_alias_ptr_type (first_stmt_info);
   10184              :         }
   10185              :       else
   10186              :         {
   10187        12785 :           first_stmt_info = stmt_info;
   10188        12785 :           first_dr_info = dr_info;
   10189        12785 :           ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
   10190              :         }
   10191              : 
   10192        23519 :       if (grouped_load)
   10193              :         {
   10194        10734 :           if (memory_access_type == VMAT_STRIDED_SLP)
   10195              :             {
   10196              :               /* If we elided a consecutive load permutation, adjust
   10197              :                  the group size here.  */
   10198         4217 :               if (!ls.subchain_p)
   10199         4173 :                 group_size = DR_GROUP_SIZE (first_stmt_info);
   10200              :               else
   10201           44 :                 group_size = SLP_TREE_LANES (slp_node);
   10202              :             }
   10203              :           else /* VMAT_ELEMENTWISE */
   10204         6517 :             group_size = SLP_TREE_LANES (slp_node);
   10205              :         }
   10206              :       else
   10207              :         group_size = 1;
   10208              : 
   10209        23519 :       if (!costing_p)
   10210              :         {
   10211         3430 :           dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
   10212         3430 :           stride_base = fold_build_pointer_plus (
   10213              :             DR_BASE_ADDRESS (first_dr_info->dr),
   10214              :             size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
   10215              :                         convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
   10216         3430 :           stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
   10217              : 
   10218              :           /* For a load with loop-invariant (but other than power-of-2)
   10219              :              stride (i.e. not a grouped access) like so:
   10220              : 
   10221              :                for (i = 0; i < n; i += stride)
   10222              :                  ... = array[i];
   10223              : 
   10224              :              we generate a new induction variable and new accesses to
   10225              :              form a new vector (or vectors, depending on ncopies):
   10226              : 
   10227              :                for (j = 0; ; j += VF*stride)
   10228              :                  tmp1 = array[j];
   10229              :                  tmp2 = array[j + stride];
   10230              :                  ...
   10231              :                  vectemp = {tmp1, tmp2, ...}
   10232              :              */
   10233              : 
   10234         3430 :           ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
   10235              :                                 build_int_cst (TREE_TYPE (stride_step), vf));
   10236              : 
   10237         3430 :           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
   10238              : 
   10239         3430 :           stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
   10240         3430 :           ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
   10241         3430 :           create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
   10242              :                      loop, &incr_gsi, insert_after,
   10243              :                      &offvar, NULL);
   10244              : 
   10245         3430 :           stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
   10246              :         }
   10247              : 
   10248        23519 :       running_off = offvar;
   10249        23519 :       alias_off = build_int_cst (ref_type, 0);
   10250        23519 :       int nloads = const_nunits;
   10251        23519 :       int lnel = 1;
   10252        23519 :       tree ltype = TREE_TYPE (vectype);
   10253        23519 :       tree lvectype = vectype;
   10254        23519 :       auto_vec<tree> dr_chain;
   10255              :       /* ???  Modify local copies of alignment_support_scheme and
   10256              :          misalignment, but this part of analysis should be done
   10257              :          earlier and remembered, likewise the chosen load mode.  */
   10258        23519 :       const dr_alignment_support tem = alignment_support_scheme;
   10259        23519 :       dr_alignment_support alignment_support_scheme = tem;
   10260        23519 :       const int tem2 = misalignment;
   10261        23519 :       int misalignment = tem2;
   10262        23519 :       if (memory_access_type == VMAT_STRIDED_SLP)
   10263              :         {
   10264        17002 :           HOST_WIDE_INT n = gcd (group_size, const_nunits);
   10265              :           /* Use the target vector type if the group size is a multiple
   10266              :              of it.  */
   10267        17002 :           if (n == const_nunits)
   10268              :             {
   10269         2247 :               int mis_align = dr_misalignment (first_dr_info, vectype);
   10270              :               /* With VF > 1 we advance the DR by step, if that is constant
   10271              :                  and only aligned when performed VF times, DR alignment
   10272              :                  analysis can analyze this as aligned since it assumes
   10273              :                  contiguous accesses.  But that is not how we code generate
   10274              :                  here, so adjust for this.  */
   10275         2247 :               if (maybe_gt (vf, 1u)
   10276         3613 :                   && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
   10277         3401 :                                   DR_TARGET_ALIGNMENT (first_dr_info)))
   10278          212 :                 mis_align = -1;
   10279         2247 :               dr_alignment_support dr_align
   10280         2247 :                 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
   10281              :                                                  mis_align);
   10282         2247 :               if (dr_align == dr_aligned
   10283         2247 :                   || dr_align == dr_unaligned_supported)
   10284              :                 {
   10285        17002 :                   nloads = 1;
   10286        17002 :                   lnel = const_nunits;
   10287        17002 :                   ltype = vectype;
   10288        17002 :                   alignment_support_scheme = dr_align;
   10289        17002 :                   misalignment = mis_align;
   10290              :                 }
   10291              :             }
   10292              :           /* Else use the biggest vector we can load the group without
   10293              :              accessing excess elements.  */
   10294        14755 :           else if (n > 1)
   10295              :             {
   10296         1965 :               tree ptype;
   10297         1965 :               tree vtype
   10298         1965 :                 = vector_vector_composition_type (vectype, const_nunits / n,
   10299              :                                                   &ptype);
   10300         1965 :               if (vtype != NULL_TREE)
   10301              :                 {
   10302         1927 :                   dr_alignment_support dr_align;
   10303         1927 :                   int mis_align = 0;
   10304         1927 :                   if (VECTOR_TYPE_P (ptype))
   10305              :                     {
   10306         1005 :                       mis_align = dr_misalignment (first_dr_info, ptype);
   10307         1005 :                       if (maybe_gt (vf, 1u)
   10308         1980 :                           && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
   10309         1011 :                                           DR_TARGET_ALIGNMENT (first_dr_info)))
   10310          969 :                         mis_align = -1;
   10311         1005 :                       dr_align
   10312         1005 :                         = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
   10313              :                                                          mis_align);
   10314              :                     }
   10315              :                   else
   10316              :                     dr_align = dr_unaligned_supported;
   10317         1927 :                   if (dr_align == dr_aligned
   10318         1927 :                       || dr_align == dr_unaligned_supported)
   10319              :                     {
   10320         1927 :                       nloads = const_nunits / n;
   10321         1927 :                       lnel = n;
   10322         1927 :                       lvectype = vtype;
   10323         1927 :                       ltype = ptype;
   10324         1927 :                       alignment_support_scheme = dr_align;
   10325         1927 :                       misalignment = mis_align;
   10326              :                     }
   10327              :                 }
   10328              :             }
   10329        17002 :           unsigned align;
   10330        17002 :           if (alignment_support_scheme == dr_aligned)
   10331           20 :             align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
   10332              :           else
   10333        16982 :             align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
   10334              :           /* Alignment is at most the access size if we do multiple loads.  */
   10335        17002 :           if (nloads > 1)
   10336        14755 :             align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
   10337        17002 :           ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
   10338              :         }
   10339              : 
   10340        23519 :       if (costing_p)
   10341              :         {
   10342              :           /* Record the composition type for target access during costing.  */
   10343        20089 :           ls.ls_type = lvectype;
   10344        20089 :           ls.ls_eltype = ltype;
   10345              :         }
   10346              :       else
   10347         3430 :         gcc_assert (ls.ls_type == lvectype && ls.ls_eltype == ltype);
   10348              : 
   10349              :       /* For SLP permutation support we need to load the whole group,
   10350              :          not only the number of vector stmts the permutation result
   10351              :          fits in.  */
   10352        23519 :       int ncopies;
   10353        23519 :       if (ls.slp_perm)
   10354              :         {
   10355         2869 :           gcc_assert (memory_access_type != VMAT_ELEMENTWISE);
   10356              :           /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
   10357              :              variable VF.  */
   10358         2869 :           unsigned int const_vf = vf.to_constant ();
   10359         2869 :           ncopies = CEIL (group_size * const_vf, const_nunits);
   10360         2869 :           dr_chain.create (ncopies);
   10361              :         }
   10362              :       else
   10363              :         ncopies = vec_num;
   10364              : 
   10365        23519 :       unsigned int group_el = 0;
   10366        23519 :       unsigned HOST_WIDE_INT
   10367        23519 :         elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
   10368        23519 :       unsigned int n_groups = 0;
   10369              :       /* For costing some adjacent vector loads, we'd like to cost with
   10370              :          the total number of them once instead of cost each one by one. */
   10371        23519 :       unsigned int n_adjacent_loads = 0;
   10372        56276 :       for (j = 0; j < ncopies; j++)
   10373              :         {
   10374        32757 :           if (nloads > 1 && !costing_p)
   10375         3145 :             vec_alloc (v, nloads);
   10376              :           gimple *new_stmt = NULL;
   10377       137922 :           for (i = 0; i < nloads; i++)
   10378              :             {
   10379       105165 :               if (costing_p)
   10380              :                 {
   10381              :                   /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
   10382              :                      avoid ICE, see PR110776.  */
   10383        95086 :                   if (VECTOR_TYPE_P (ltype)
   10384         5822 :                       && memory_access_type != VMAT_ELEMENTWISE)
   10385         5822 :                     n_adjacent_loads++;
   10386              :                   else
   10387        89264 :                     inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
   10388              :                                                      slp_node, 0, vect_body);
   10389        95086 :                   continue;
   10390              :                 }
   10391        10079 :               unsigned int load_el = group_el;
   10392              :               /* For elementwise accesses apply a load permutation directly.  */
   10393        10079 :               if (memory_access_type == VMAT_ELEMENTWISE
   10394        10079 :                   && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   10395         2014 :                 load_el = SLP_TREE_LOAD_PERMUTATION (slp_node)[group_el];
   10396        10079 :               tree this_off = build_int_cst (TREE_TYPE (alias_off),
   10397        10079 :                                              load_el * elsz + cst_offset);
   10398        10079 :               tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
   10399        10079 :               vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   10400        10079 :               new_temp = make_ssa_name (ltype);
   10401        10079 :               new_stmt = gimple_build_assign (new_temp, data_ref);
   10402        10079 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   10403        10079 :               if (nloads > 1)
   10404         8472 :                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
   10405              : 
   10406        10079 :               group_el += lnel;
   10407        10079 :               if (group_el == group_size)
   10408              :                 {
   10409         9732 :                   n_groups++;
   10410              :                   /* When doing SLP make sure to not load elements from
   10411              :                      the next vector iteration, those will not be accessed
   10412              :                      so just use the last element again.  See PR107451.  */
   10413         9732 :                   if (known_lt (n_groups, vf))
   10414              :                     {
   10415         6282 :                       tree newoff = copy_ssa_name (running_off);
   10416         6282 :                       gimple *incr
   10417         6282 :                         = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
   10418              :                                                running_off, stride_step);
   10419         6282 :                       vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
   10420         6282 :                       running_off = newoff;
   10421              :                     }
   10422              :                   group_el = 0;
   10423              :                 }
   10424              :             }
   10425              : 
   10426        32757 :           if (nloads > 1)
   10427              :             {
   10428        24020 :               if (costing_p)
   10429        20875 :                 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
   10430              :                                                  slp_node, 0, vect_body);
   10431              :               else
   10432              :                 {
   10433         3145 :                   tree vec_inv = build_constructor (lvectype, v);
   10434         3145 :                   new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
   10435              :                                                lvectype, gsi);
   10436         3145 :                   new_stmt = SSA_NAME_DEF_STMT (new_temp);
   10437         3145 :                   if (lvectype != vectype)
   10438              :                     {
   10439          398 :                       new_stmt
   10440          398 :                         = gimple_build_assign (make_ssa_name (vectype),
   10441              :                                                VIEW_CONVERT_EXPR,
   10442              :                                                build1 (VIEW_CONVERT_EXPR,
   10443              :                                                        vectype, new_temp));
   10444          398 :                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
   10445              :                                                    gsi);
   10446              :                     }
   10447              :                 }
   10448              :             }
   10449         8737 :           else if (!costing_p && ltype != vectype)
   10450              :             {
   10451         1588 :               new_stmt = gimple_build_assign (make_ssa_name (vectype),
   10452              :                                               VIEW_CONVERT_EXPR,
   10453              :                                               build1 (VIEW_CONVERT_EXPR,
   10454              :                                                       vectype, new_temp));
   10455         1588 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
   10456              :                                            gsi);
   10457              :             }
   10458              : 
   10459        32757 :           if (!costing_p)
   10460              :             {
   10461         4752 :               if (ls.slp_perm)
   10462         1682 :                 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
   10463              :               else
   10464         3070 :                 slp_node->push_vec_def (new_stmt);
   10465              :             }
   10466              :         }
   10467        23519 :       if (ls.slp_perm)
   10468              :         {
   10469         2869 :           if (costing_p)
   10470              :             {
   10471         2076 :               gcc_assert (ls.n_perms != -1U);
   10472         2076 :               inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
   10473              :                                                slp_node, 0, vect_body);
   10474              :             }
   10475              :           else
   10476              :             {
   10477          793 :               unsigned n_perms2;
   10478          793 :               vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
   10479              :                                             false, &n_perms2);
   10480          793 :               gcc_assert (ls.n_perms == n_perms2);
   10481              :             }
   10482              :         }
   10483              : 
   10484        23519 :       if (costing_p)
   10485              :         {
   10486        20089 :           if (n_adjacent_loads > 0)
   10487         2152 :             vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
   10488              :                                 alignment_support_scheme, misalignment, false,
   10489              :                                 &inside_cost, nullptr, cost_vec, cost_vec,
   10490              :                                 true);
   10491        20089 :           if (dump_enabled_p ())
   10492          498 :             dump_printf_loc (MSG_NOTE, vect_location,
   10493              :                              "vect_model_load_cost: inside_cost = %u, "
   10494              :                              "prologue_cost = 0 .\n",
   10495              :                              inside_cost);
   10496        20089 :           SLP_TREE_TYPE (slp_node) = load_vec_info_type;
   10497        20089 :           slp_node->data = new vect_load_store_data (std::move (ls));
   10498              :         }
   10499              : 
   10500        23519 :       return true;
   10501        23519 :     }
   10502              : 
   10503       573867 :   if (mat_gather_scatter_p (memory_access_type)
   10504       573867 :       && !ls.ls_type)
   10505              :     grouped_load = false;
   10506              : 
   10507       570897 :   if (grouped_load
   10508       573867 :       || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   10509              :     {
   10510       261625 :       if (grouped_load)
   10511              :         {
   10512       261183 :           first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
   10513       261183 :           group_size = DR_GROUP_SIZE (first_stmt_info);
   10514              :         }
   10515              :       else
   10516              :         {
   10517              :           first_stmt_info = stmt_info;
   10518              :           group_size = 1;
   10519              :         }
   10520              :       /* For SLP vectorization we directly vectorize a subchain
   10521              :          without permutation.  */
   10522       261625 :       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
   10523       208631 :         first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
   10524              :       /* For BB vectorization always use the first stmt to base
   10525              :          the data ref pointer on.  */
   10526       261625 :       if (bb_vinfo)
   10527       207027 :         first_stmt_info_for_drptr
   10528       207027 :           = vect_find_first_scalar_stmt_in_slp (slp_node);
   10529              : 
   10530       261625 :       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   10531       261625 :       group_gap_adj = 0;
   10532              : 
   10533              :       /* VEC_NUM is the number of vect stmts to be created for this group.  */
   10534       261625 :       grouped_load = false;
   10535              :       /* If an SLP permutation is from N elements to N elements,
   10536              :          and if one vector holds a whole number of N, we can load
   10537              :          the inputs to the permutation in the same way as an
   10538              :          unpermuted sequence.  In other cases we need to load the
   10539              :          whole group, not only the number of vector stmts the
   10540              :          permutation result fits in.  */
   10541       261625 :       unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
   10542       261625 :       if (nested_in_vect_loop)
   10543              :         /* We do not support grouped accesses in a nested loop,
   10544              :            instead the access is contiguous but it might be
   10545              :            permuted.  No gap adjustment is needed though.  */
   10546              :         ;
   10547       261623 :       else if (ls.slp_perm
   10548       261623 :                && (group_size != scalar_lanes
   10549        11204 :                    || !multiple_p (nunits, group_size)))
   10550              :         {
   10551              :           /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
   10552              :              variable VF; see vect_transform_slp_perm_load.  */
   10553        42966 :           unsigned int const_vf = vf.to_constant ();
   10554        42966 :           unsigned int const_nunits = nunits.to_constant ();
   10555        42966 :           vec_num = CEIL (group_size * const_vf, const_nunits);
   10556        42966 :           group_gap_adj = vf * group_size - nunits * vec_num;
   10557              :         }
   10558              :       else
   10559              :         {
   10560       218657 :           group_gap_adj = group_size - scalar_lanes;
   10561              :         }
   10562              : 
   10563       261625 :       ref_type = get_group_alias_ptr_type (first_stmt_info);
   10564              :     }
   10565              :   else
   10566              :     {
   10567       312242 :       first_stmt_info = stmt_info;
   10568       312242 :       first_dr_info = dr_info;
   10569       312242 :       group_size = 1;
   10570       312242 :       group_gap_adj = 0;
   10571       312242 :       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
   10572              :     }
   10573              : 
   10574       573867 :   vec_loop_masks *loop_masks
   10575       366840 :     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
   10576       573867 :        ? &LOOP_VINFO_MASKS (loop_vinfo)
   10577           31 :        : NULL);
   10578           31 :   vec_loop_lens *loop_lens
   10579       366840 :     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
   10580              :        ? &LOOP_VINFO_LENS (loop_vinfo)
   10581            0 :        : NULL);
   10582              : 
   10583              :   /* The vect_transform_stmt and vect_analyze_stmt will go here but there
   10584              :      are some difference here.  We cannot enable both the lens and masks
   10585              :      during transform but it is allowed during analysis.
   10586              :      Shouldn't go with length-based approach if fully masked.  */
   10587       573867 :   if (cost_vec == NULL)
   10588              :     /* The cost_vec is NULL during transform.  */
   10589       162412 :     gcc_assert ((!loop_lens || !loop_masks));
   10590              : 
   10591              :   /* Targets with store-lane instructions must not require explicit
   10592              :      realignment.  vect_supportable_dr_alignment always returns either
   10593              :      dr_aligned or dr_unaligned_supported for (non-length) masked
   10594              :      operations.  */
   10595       573867 :   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
   10596              :                && !mask_node
   10597              :                && !loop_masks)
   10598              :               || mat_gather_scatter_p (memory_access_type)
   10599              :               || alignment_support_scheme == dr_aligned
   10600              :               || alignment_support_scheme == dr_unaligned_supported);
   10601              : 
   10602              :   /* In case the vectorization factor (VF) is bigger than the number
   10603              :      of elements that we can fit in a vectype (nunits), we have to generate
   10604              :      more than one vector stmt - i.e - we need to "unroll" the
   10605              :      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
   10606              :      from one copy of the vector stmt to the next, in the field
   10607              :      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
   10608              :      stages to find the correct vector defs to be used when vectorizing
   10609              :      stmts that use the defs of the current stmt.  The example below
   10610              :      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
   10611              :      need to create 4 vectorized stmts):
   10612              : 
   10613              :      before vectorization:
   10614              :                                 RELATED_STMT    VEC_STMT
   10615              :         S1:     x = memref      -               -
   10616              :         S2:     z = x + 1       -               -
   10617              : 
   10618              :      step 1: vectorize stmt S1:
   10619              :         We first create the vector stmt VS1_0, and, as usual, record a
   10620              :         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
   10621              :         Next, we create the vector stmt VS1_1, and record a pointer to
   10622              :         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
   10623              :         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
   10624              :         stmts and pointers:
   10625              :                                 RELATED_STMT    VEC_STMT
   10626              :         VS1_0:  vx0 = memref0   VS1_1           -
   10627              :         VS1_1:  vx1 = memref1   VS1_2           -
   10628              :         VS1_2:  vx2 = memref2   VS1_3           -
   10629              :         VS1_3:  vx3 = memref3   -               -
   10630              :         S1:     x = load        -               VS1_0
   10631              :         S2:     z = x + 1       -               -
   10632              :   */
   10633              : 
   10634              :   /* If the data reference is aligned (dr_aligned) or potentially unaligned
   10635              :      on a target that supports unaligned accesses (dr_unaligned_supported)
   10636              :      we generate the following code:
   10637              :          p = initial_addr;
   10638              :          indx = 0;
   10639              :          loop {
   10640              :            p = p + indx * vectype_size;
   10641              :            vec_dest = *(p);
   10642              :            indx = indx + 1;
   10643              :          }
   10644              : 
   10645              :      Otherwise, the data reference is potentially unaligned on a target that
   10646              :      does not support unaligned accesses (dr_explicit_realign_optimized) -
   10647              :      then generate the following code, in which the data in each iteration is
   10648              :      obtained by two vector loads, one from the previous iteration, and one
   10649              :      from the current iteration:
   10650              :          p1 = initial_addr;
   10651              :          msq_init = *(floor(p1))
   10652              :          p2 = initial_addr + VS - 1;
   10653              :          realignment_token = call target_builtin;
   10654              :          indx = 0;
   10655              :          loop {
   10656              :            p2 = p2 + indx * vectype_size
   10657              :            lsq = *(floor(p2))
   10658              :            vec_dest = realign_load (msq, lsq, realignment_token)
   10659              :            indx = indx + 1;
   10660              :            msq = lsq;
   10661              :          }   */
   10662              : 
   10663              :   /* If the misalignment remains the same throughout the execution of the
   10664              :      loop, we can create the init_addr and permutation mask at the loop
   10665              :      preheader.  Otherwise, it needs to be created inside the loop.
   10666              :      This can only occur when vectorizing memory accesses in the inner-loop
   10667              :      nested within an outer-loop that is being vectorized.  */
   10668              : 
   10669       573867 :   if (nested_in_vect_loop
   10670       573867 :       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
   10671         1234 :                       GET_MODE_SIZE (TYPE_MODE (vectype))))
   10672              :     {
   10673          195 :       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
   10674              :       compute_in_loop = true;
   10675              :     }
   10676              : 
   10677       573867 :   bool diff_first_stmt_info
   10678       573867 :     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
   10679              : 
   10680       573867 :   tree offset = NULL_TREE;
   10681       573867 :   if ((alignment_support_scheme == dr_explicit_realign_optimized
   10682       573867 :        || alignment_support_scheme == dr_explicit_realign)
   10683            0 :       && !compute_in_loop)
   10684              :     {
   10685              :       /* If we have different first_stmt_info, we can't set up realignment
   10686              :          here, since we can't guarantee first_stmt_info DR has been
   10687              :          initialized yet, use first_stmt_info_for_drptr DR by bumping the
   10688              :          distance from first_stmt_info DR instead as below.  */
   10689            0 :       if (!costing_p)
   10690              :         {
   10691            0 :           if (!diff_first_stmt_info)
   10692            0 :             msq = vect_setup_realignment (vinfo, first_stmt_info, vectype, gsi,
   10693              :                                           &realignment_token,
   10694              :                                           alignment_support_scheme, NULL_TREE,
   10695              :                                           &at_loop);
   10696            0 :           if (alignment_support_scheme == dr_explicit_realign_optimized)
   10697              :             {
   10698            0 :               phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
   10699            0 :               offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
   10700              :                                    size_one_node);
   10701            0 :               gcc_assert (!first_stmt_info_for_drptr);
   10702              :             }
   10703              :         }
   10704              :     }
   10705              :   else
   10706       573867 :     at_loop = loop;
   10707              : 
   10708       573867 :   if (!known_eq (poffset, 0))
   10709         4626 :     offset = (offset
   10710         4626 :               ? size_binop (PLUS_EXPR, offset, size_int (poffset))
   10711         4626 :               : size_int (poffset));
   10712              : 
   10713       573867 :   tree bump;
   10714       573867 :   tree vec_offset = NULL_TREE;
   10715              : 
   10716       573867 :   auto_vec<tree> vec_offsets;
   10717       573867 :   auto_vec<tree> vec_masks;
   10718       573867 :   if (mask_node && !costing_p)
   10719          636 :     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
   10720              :                        &vec_masks);
   10721              : 
   10722       573867 :   tree vec_mask = NULL_TREE;
   10723       573867 :   tree vec_els = NULL_TREE;
   10724       573867 :   if (memory_access_type == VMAT_LOAD_STORE_LANES)
   10725              :     {
   10726            0 :       const internal_fn lanes_ifn = ls.lanes_ifn;
   10727              : 
   10728            0 :       gcc_assert (alignment_support_scheme == dr_aligned
   10729              :                   || alignment_support_scheme == dr_unaligned_supported);
   10730              : 
   10731            0 :       aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
   10732            0 :       if (!costing_p)
   10733            0 :         bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
   10734              :                                             memory_access_type, loop_lens);
   10735              : 
   10736            0 :       unsigned int inside_cost = 0, prologue_cost = 0;
   10737              :       /* For costing some adjacent vector loads, we'd like to cost with
   10738              :          the total number of them once instead of cost each one by one. */
   10739            0 :       unsigned int n_adjacent_loads = 0;
   10740            0 :       int ncopies = vec_num / group_size;
   10741            0 :       for (j = 0; j < ncopies; j++)
   10742              :         {
   10743            0 :           if (costing_p)
   10744              :             {
   10745              :               /* An IFN_LOAD_LANES will load all its vector results,
   10746              :                  regardless of which ones we actually need.  Account
   10747              :                  for the cost of unused results.  */
   10748            0 :               if (first_stmt_info == stmt_info)
   10749              :                 {
   10750            0 :                   unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
   10751            0 :                   stmt_vec_info next_stmt_info = first_stmt_info;
   10752            0 :                   do
   10753              :                     {
   10754            0 :                       gaps -= 1;
   10755            0 :                       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   10756              :                     }
   10757            0 :                   while (next_stmt_info);
   10758            0 :                   if (gaps)
   10759              :                     {
   10760            0 :                       if (dump_enabled_p ())
   10761            0 :                         dump_printf_loc (MSG_NOTE, vect_location,
   10762              :                                          "vect_model_load_cost: %d "
   10763              :                                          "unused vectors.\n",
   10764              :                                          gaps);
   10765            0 :                       vect_get_load_cost (vinfo, stmt_info, slp_node, gaps,
   10766              :                                           alignment_support_scheme,
   10767              :                                           misalignment, false, &inside_cost,
   10768              :                                           &prologue_cost, cost_vec, cost_vec,
   10769              :                                           true);
   10770              :                     }
   10771              :                 }
   10772            0 :               n_adjacent_loads++;
   10773            0 :               continue;
   10774            0 :             }
   10775              : 
   10776              :           /* 1. Create the vector or array pointer update chain.  */
   10777            0 :           if (j == 0)
   10778            0 :             dataref_ptr
   10779            0 :               = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
   10780              :                                           at_loop, offset, &dummy, gsi,
   10781              :                                           &ptr_incr, false, bump);
   10782              :           else
   10783              :             {
   10784            0 :               gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
   10785            0 :               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   10786              :                                              stmt_info, bump);
   10787              :             }
   10788            0 :           if (mask_node)
   10789            0 :             vec_mask = vec_masks[j];
   10790              : 
   10791            0 :           tree vec_array = create_vector_array (vectype, group_size);
   10792              : 
   10793            0 :           tree final_mask = NULL_TREE;
   10794            0 :           tree final_len = NULL_TREE;
   10795            0 :           tree bias = NULL_TREE;
   10796            0 :           if (loop_masks)
   10797            0 :             final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
   10798              :                                              ncopies, vectype, j);
   10799            0 :           if (vec_mask)
   10800            0 :             final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
   10801              :                                            vec_mask, gsi);
   10802              : 
   10803            0 :           if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
   10804              :             {
   10805            0 :               if (loop_lens)
   10806            0 :                 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
   10807              :                                                ncopies, vectype, j, 1, true);
   10808              :               else
   10809            0 :                 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
   10810            0 :               signed char biasval
   10811            0 :                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   10812            0 :               bias = build_int_cst (intQI_type_node, biasval);
   10813            0 :               if (!final_mask)
   10814              :                 {
   10815            0 :                   mask_vectype = truth_type_for (vectype);
   10816            0 :                   final_mask = build_minus_one_cst (mask_vectype);
   10817              :                 }
   10818              :             }
   10819              : 
   10820            0 :           if (final_mask)
   10821              :             {
   10822            0 :               vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
   10823            0 :               if (type_mode_padding_p
   10824            0 :                   && maskload_elsval != MASK_LOAD_ELSE_ZERO)
   10825            0 :                 need_zeroing = true;
   10826              :             }
   10827              : 
   10828            0 :           gcall *call;
   10829            0 :           if (final_len && final_mask)
   10830              :             {
   10831              :               /* Emit:
   10832              :                    VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
   10833              :                                                     VEC_MASK, LEN, BIAS).  */
   10834            0 :               unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
   10835            0 :               tree alias_ptr = build_int_cst (ref_type, align);
   10836            0 :               call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
   10837              :                                                  dataref_ptr, alias_ptr,
   10838              :                                                  final_mask, vec_els,
   10839              :                                                  final_len, bias);
   10840              :             }
   10841            0 :           else if (final_mask)
   10842              :             {
   10843              :               /* Emit:
   10844              :                    VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
   10845              :                                                 VEC_MASK).  */
   10846            0 :               unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
   10847            0 :               tree alias_ptr = build_int_cst (ref_type, align);
   10848            0 :               call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
   10849              :                                                  dataref_ptr, alias_ptr,
   10850              :                                                  final_mask, vec_els);
   10851              :             }
   10852              :           else
   10853              :             {
   10854              :               /* Emit:
   10855              :                    VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
   10856            0 :               data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
   10857            0 :               call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
   10858              :             }
   10859            0 :           gimple_call_set_lhs (call, vec_array);
   10860            0 :           gimple_call_set_nothrow (call, true);
   10861            0 :           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
   10862              : 
   10863              :           /* Extract each vector into an SSA_NAME.  */
   10864            0 :           for (unsigned i = 0; i < group_size; i++)
   10865              :             {
   10866            0 :               new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
   10867              :                                             vec_array, i, need_zeroing,
   10868              :                                             final_mask);
   10869            0 :               slp_node->push_vec_def (new_temp);
   10870              :             }
   10871              : 
   10872              :           /* Record that VEC_ARRAY is now dead.  */
   10873            0 :           vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
   10874              :         }
   10875              : 
   10876            0 :       if (costing_p)
   10877              :         {
   10878            0 :           if (n_adjacent_loads > 0)
   10879            0 :             vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
   10880              :                                 alignment_support_scheme, misalignment, false,
   10881              :                                 &inside_cost, &prologue_cost, cost_vec,
   10882              :                                 cost_vec, true);
   10883            0 :           if (dump_enabled_p ())
   10884            0 :             dump_printf_loc (MSG_NOTE, vect_location,
   10885              :                              "vect_model_load_cost: inside_cost = %u, "
   10886              :                              "prologue_cost = %u .\n",
   10887              :                              inside_cost, prologue_cost);
   10888            0 :           SLP_TREE_TYPE (slp_node) = load_vec_info_type;
   10889            0 :           slp_node->data = new vect_load_store_data (std::move (ls));
   10890              :         }
   10891              : 
   10892            0 :       return true;
   10893              :     }
   10894              : 
   10895       573867 :   if (mat_gather_scatter_p (memory_access_type))
   10896              :     {
   10897         2970 :       gcc_assert ((!grouped_load && !ls.slp_perm) || ls.ls_type);
   10898              : 
   10899         2970 :       auto_vec<tree> dr_chain (vec_num);
   10900              : 
   10901              :       /* If we pun the original vectype the loads as well as costing, length,
   10902              :          etc. is performed with the new type.  After loading we VIEW_CONVERT
   10903              :          the data to the original vectype.  */
   10904         2970 :       tree original_vectype = vectype;
   10905         2970 :       if (ls.ls_type)
   10906            0 :         vectype = ls.ls_type;
   10907              : 
   10908              :       /* 1. Create the vector or array pointer update chain.  */
   10909         2970 :       if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   10910              :         {
   10911         2970 :           aggr_type = NULL_TREE;
   10912         2970 :           bump = NULL_TREE;
   10913         2970 :           if (!costing_p)
   10914          763 :             vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
   10915              :                                          &vec_offsets);
   10916              :         }
   10917              :       else
   10918              :         {
   10919            0 :           aggr_type = elem_type;
   10920            0 :           if (!costing_p)
   10921              :             {
   10922            0 :               vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
   10923              :                                                ls.strided_offset_vectype,
   10924              :                                                loop_vinfo, gsi,
   10925              :                                                &bump, &vec_offset, loop_lens);
   10926            0 :               dataref_ptr
   10927            0 :                   = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
   10928              :                                               at_loop, offset, &dummy, gsi,
   10929              :                                               &ptr_incr, false, bump);
   10930              :             }
   10931              :         }
   10932              : 
   10933              :       unsigned int inside_cost = 0, prologue_cost = 0;
   10934              : 
   10935         6713 :       gimple *new_stmt = NULL;
   10936         6713 :       for (i = 0; i < vec_num; i++)
   10937              :         {
   10938         3743 :           tree final_mask = NULL_TREE;
   10939         3743 :           tree final_len = NULL_TREE;
   10940         3743 :           tree bias = NULL_TREE;
   10941         3743 :           if (!costing_p)
   10942              :             {
   10943          980 :               if (mask_node)
   10944          153 :                 vec_mask = vec_masks[i];
   10945          980 :               if (loop_masks)
   10946            0 :                 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
   10947              :                                                  vec_num, vectype, i);
   10948          980 :               if (vec_mask)
   10949          153 :                 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   10950              :                                                final_mask, vec_mask, gsi);
   10951              : 
   10952          980 :               if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   10953            0 :                 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
   10954              :                                                gsi, stmt_info, bump);
   10955              :             }
   10956              : 
   10957              :           /* 2. Create the vector-load in the loop.  */
   10958         3743 :           unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
   10959         3743 :           tree alias_align_ptr = build_int_cst (ref_type, align);
   10960         3743 :           if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
   10961              :             {
   10962            0 :               if (costing_p)
   10963              :                 {
   10964            0 :                   if (ls.supported_offset_vectype
   10965            0 :                       && !tree_nop_conversion_p (ls.supported_offset_vectype,
   10966              :                                                  vec_offset))
   10967            0 :                     inside_cost
   10968            0 :                       += record_stmt_cost (cost_vec, 1, vector_stmt,
   10969              :                                            slp_node, 0, vect_body);
   10970            0 :                   if (ls.supported_scale)
   10971            0 :                     inside_cost
   10972            0 :                       += record_stmt_cost (cost_vec, 1, vector_stmt,
   10973              :                                            slp_node, 0, vect_body);
   10974              : 
   10975            0 :                   unsigned int cnunits = vect_nunits_for_cost (vectype);
   10976            0 :                   inside_cost
   10977            0 :                     = record_stmt_cost (cost_vec, cnunits, scalar_load,
   10978              :                                         slp_node, 0, vect_body);
   10979         3743 :                   continue;
   10980            0 :                 }
   10981            0 :               if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
   10982            0 :                 vec_offset = vec_offsets[i];
   10983            0 :               tree zero = build_zero_cst (vectype);
   10984            0 :               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
   10985            0 :               bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
   10986              : 
   10987              :               /* Perform the offset conversion and scaling if necessary.  */
   10988            0 :               if (!strided
   10989            0 :                   && (ls.supported_offset_vectype || ls.supported_scale))
   10990              :                 {
   10991            0 :                   gimple_seq stmts = NULL;
   10992            0 :                   if (ls.supported_offset_vectype)
   10993            0 :                     vec_offset = gimple_convert
   10994            0 :                       (&stmts, ls.supported_offset_vectype, vec_offset);
   10995            0 :                   if (ls.supported_scale)
   10996              :                     {
   10997              :                       /* Only scale the vec_offset if we haven't already.  */
   10998            0 :                       if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
   10999            0 :                           || i == 0)
   11000              :                         {
   11001            0 :                           tree mult_cst = build_int_cst
   11002            0 :                             (TREE_TYPE (TREE_TYPE (vec_offset)),
   11003            0 :                              SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
   11004            0 :                           tree mult = build_vector_from_val
   11005            0 :                             (TREE_TYPE (vec_offset), mult_cst);
   11006            0 :                           vec_offset = gimple_build
   11007            0 :                             (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
   11008              :                              vec_offset, mult);
   11009              :                         }
   11010            0 :                       scale = size_int (ls.supported_scale);
   11011              :                     }
   11012            0 :                   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   11013              :                 }
   11014              : 
   11015            0 :               if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
   11016              :                 {
   11017            0 :                   if (loop_lens)
   11018            0 :                     final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
   11019              :                                                    vec_num, vectype, i, 1, true);
   11020              :                   else
   11021            0 :                     final_len = build_int_cst (sizetype,
   11022            0 :                                                TYPE_VECTOR_SUBPARTS (vectype));
   11023            0 :                   signed char biasval
   11024            0 :                     = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   11025            0 :                   bias = build_int_cst (intQI_type_node, biasval);
   11026            0 :                   if (!final_mask)
   11027              :                     {
   11028            0 :                       mask_vectype = truth_type_for (vectype);
   11029            0 :                       final_mask = build_minus_one_cst (mask_vectype);
   11030              :                     }
   11031              :                 }
   11032              : 
   11033            0 :               if (final_mask)
   11034              :                 {
   11035            0 :                   vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
   11036            0 :                   if (type_mode_padding_p
   11037            0 :                       && maskload_elsval != MASK_LOAD_ELSE_ZERO)
   11038            0 :                     need_zeroing = true;
   11039              :                 }
   11040              : 
   11041            0 :               gcall *call;
   11042            0 :               if (final_len && final_mask)
   11043              :                 {
   11044            0 :                   if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
   11045            0 :                     call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
   11046              :                                                        9, dataref_ptr,
   11047              :                                                        alias_align_ptr,
   11048              :                                                        vec_offset, scale, zero,
   11049              :                                                        final_mask, vec_els,
   11050              :                                                        final_len, bias);
   11051              :                   else
   11052              :                     /* Non-vector offset indicates that prefer to take
   11053              :                        MASK_LEN_STRIDED_LOAD instead of the
   11054              :                        MASK_LEN_GATHER_LOAD with direct stride arg.  */
   11055            0 :                     call = gimple_build_call_internal
   11056            0 :                              (IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr,
   11057              :                               vec_offset, zero, final_mask, vec_els, final_len,
   11058              :                               bias);
   11059              :                 }
   11060            0 :               else if (final_mask)
   11061            0 :                 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
   11062              :                                                    7, dataref_ptr,
   11063              :                                                    alias_align_ptr,
   11064              :                                                    vec_offset, scale,
   11065              :                                                    zero, final_mask, vec_els);
   11066              :               else
   11067            0 :                 call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
   11068              :                                                    dataref_ptr,
   11069              :                                                    alias_align_ptr,
   11070              :                                                    vec_offset, scale, zero);
   11071            0 :               gimple_call_set_nothrow (call, true);
   11072            0 :               new_stmt = call;
   11073            0 :               data_ref = NULL_TREE;
   11074              :             }
   11075         3743 :           else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
   11076              :             {
   11077              :               /* The builtin decls path for gather is legacy, x86 only.  */
   11078          849 :               gcc_assert (!final_len && nunits.is_constant ());
   11079          849 :               if (costing_p)
   11080              :                 {
   11081          566 :                   unsigned int cnunits = vect_nunits_for_cost (vectype);
   11082          566 :                   inside_cost
   11083          566 :                     = record_stmt_cost (cost_vec, cnunits, scalar_load,
   11084              :                                         slp_node, 0, vect_body);
   11085          566 :                   continue;
   11086          566 :                 }
   11087          283 :               tree offset_vectype = TREE_TYPE (vec_offsets[0]);
   11088          283 :               poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
   11089          283 :               if (known_eq (nunits, offset_nunits))
   11090              :                 {
   11091          134 :                   new_stmt = vect_build_one_gather_load_call
   11092          134 :                                (vinfo, stmt_info, slp_node, vectype, gsi,
   11093          134 :                                 ls.gs.decl, dataref_ptr, vec_offsets[i],
   11094              :                                 final_mask);
   11095          134 :                   data_ref = NULL_TREE;
   11096              :                 }
   11097          149 :               else if (known_eq (nunits, offset_nunits * 2))
   11098              :                 {
   11099              :                   /* We have a offset vector with half the number of
   11100              :                      lanes but the builtins will produce full vectype
   11101              :                      data with just the lower lanes filled.  */
   11102           63 :                   new_stmt = vect_build_one_gather_load_call
   11103          126 :                                (vinfo, stmt_info, slp_node, vectype, gsi,
   11104           63 :                                 ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
   11105              :                                 final_mask);
   11106           63 :                   tree low = make_ssa_name (vectype);
   11107           63 :                   gimple_set_lhs (new_stmt, low);
   11108           63 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11109              : 
   11110              :                   /* now put upper half of final_mask in final_mask low. */
   11111           63 :                   if (final_mask
   11112           63 :                       && !SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (final_mask))))
   11113              :                     {
   11114           11 :                       int count = nunits.to_constant ();
   11115           11 :                       vec_perm_builder sel (count, count, 1);
   11116           11 :                       sel.quick_grow (count);
   11117           87 :                       for (int i = 0; i < count; ++i)
   11118           76 :                         sel[i] = i | (count / 2);
   11119           11 :                       vec_perm_indices indices (sel, 2, count);
   11120           11 :                       tree perm_mask = vect_gen_perm_mask_checked
   11121           11 :                                          (TREE_TYPE (final_mask), indices);
   11122           11 :                       new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
   11123              :                                                       final_mask, final_mask,
   11124              :                                                       perm_mask);
   11125           11 :                       final_mask = make_ssa_name (TREE_TYPE (final_mask));
   11126           11 :                       gimple_set_lhs (new_stmt, final_mask);
   11127           11 :                       vect_finish_stmt_generation (vinfo, stmt_info,
   11128              :                                                    new_stmt, gsi);
   11129           11 :                     }
   11130           52 :                   else if (final_mask)
   11131              :                     {
   11132           24 :                       new_stmt = gimple_build_assign (NULL_TREE,
   11133              :                                                       VEC_UNPACK_HI_EXPR,
   11134              :                                                       final_mask);
   11135           24 :                       final_mask = make_ssa_name
   11136           24 :                                     (truth_type_for (offset_vectype));
   11137           24 :                       gimple_set_lhs (new_stmt, final_mask);
   11138           24 :                       vect_finish_stmt_generation (vinfo, stmt_info,
   11139              :                                                    new_stmt, gsi);
   11140              :                     }
   11141              : 
   11142           63 :                   new_stmt = vect_build_one_gather_load_call
   11143          126 :                                (vinfo, stmt_info, slp_node, vectype, gsi,
   11144              :                                 ls.gs.decl, dataref_ptr,
   11145           63 :                                 vec_offsets[2 * i + 1], final_mask);
   11146           63 :                   tree high = make_ssa_name (vectype);
   11147           63 :                   gimple_set_lhs (new_stmt, high);
   11148           63 :                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11149              : 
   11150              :                   /* compose low + high.  */
   11151           63 :                   int count = nunits.to_constant ();
   11152           63 :                   vec_perm_builder sel (count, count, 1);
   11153           63 :                   sel.quick_grow (count);
   11154          647 :                   for (int i = 0; i < count; ++i)
   11155          584 :                     sel[i] = i < count / 2 ? i : i + count / 2;
   11156           63 :                   vec_perm_indices indices (sel, 2, count);
   11157           63 :                   tree perm_mask
   11158           63 :                     = vect_gen_perm_mask_checked (vectype, indices);
   11159           63 :                   new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
   11160              :                                                   low, high, perm_mask);
   11161           63 :                   data_ref = NULL_TREE;
   11162           63 :                 }
   11163           86 :               else if (known_eq (nunits * 2, offset_nunits))
   11164              :                 {
   11165              :                   /* We have a offset vector with double the number of
   11166              :                      lanes.  Select the low/high part accordingly.  */
   11167           86 :                   vec_offset = vec_offsets[i / 2];
   11168           86 :                   if (i & 1)
   11169              :                     {
   11170           43 :                       int count = offset_nunits.to_constant ();
   11171           43 :                       vec_perm_builder sel (count, count, 1);
   11172           43 :                       sel.quick_grow (count);
   11173          463 :                       for (int i = 0; i < count; ++i)
   11174          420 :                         sel[i] = i | (count / 2);
   11175           43 :                       vec_perm_indices indices (sel, 2, count);
   11176           43 :                       tree perm_mask = vect_gen_perm_mask_checked
   11177           43 :                                          (TREE_TYPE (vec_offset), indices);
   11178           43 :                       new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
   11179              :                                                       vec_offset, vec_offset,
   11180              :                                                       perm_mask);
   11181           43 :                       vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
   11182           43 :                       gimple_set_lhs (new_stmt, vec_offset);
   11183           43 :                       vect_finish_stmt_generation (vinfo, stmt_info,
   11184              :                                                    new_stmt, gsi);
   11185           43 :                     }
   11186           86 :                   new_stmt = vect_build_one_gather_load_call
   11187           86 :                                (vinfo, stmt_info, slp_node, vectype, gsi,
   11188              :                                 ls.gs.decl,
   11189              :                                 dataref_ptr, vec_offset, final_mask);
   11190           86 :                   data_ref = NULL_TREE;
   11191              :                 }
   11192              :               else
   11193            0 :                 gcc_unreachable ();
   11194              :             }
   11195              :           else
   11196              :             {
   11197              :               /* Emulated gather-scatter.  */
   11198         2894 :               gcc_assert (!final_mask);
   11199         2894 :               unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
   11200         2894 :               if (costing_p)
   11201              :                 {
   11202              :                   /* For emulated gathers N offset vector element
   11203              :                      offset add is consumed by the load).  */
   11204         2197 :                   inside_cost = record_stmt_cost (cost_vec, 1, vec_deconstruct,
   11205              :                                                   slp_node, 0, vect_body);
   11206              :                   /* N scalar loads plus gathering them into a
   11207              :                      vector.  */
   11208         2197 :                   inside_cost
   11209         2197 :                     = record_stmt_cost (cost_vec, const_nunits, scalar_load,
   11210              :                                         slp_node, 0, vect_body);
   11211         2197 :                   inside_cost
   11212         2197 :                     = record_stmt_cost (cost_vec, 1, vec_construct,
   11213              :                                         slp_node, 0, vect_body);
   11214         2197 :                   continue;
   11215              :                 }
   11216          697 :               tree offset_vectype = TREE_TYPE (vec_offsets[0]);
   11217          697 :               unsigned HOST_WIDE_INT const_offset_nunits
   11218          697 :                 = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
   11219          697 :               vec<constructor_elt, va_gc> *ctor_elts;
   11220          697 :               vec_alloc (ctor_elts, const_nunits);
   11221          697 :               gimple_seq stmts = NULL;
   11222              :               /* We support offset vectors with more elements
   11223              :                  than the data vector for now.  */
   11224          697 :               unsigned HOST_WIDE_INT factor
   11225              :                 = const_offset_nunits / const_nunits;
   11226          697 :               vec_offset = vec_offsets[i / factor];
   11227          697 :               unsigned elt_offset = (i % factor) * const_nunits;
   11228          697 :               tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
   11229          697 :               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
   11230          697 :               tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
   11231         2825 :               for (unsigned k = 0; k < const_nunits; ++k)
   11232              :                 {
   11233         2128 :                   tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
   11234              :                                           bitsize_int (k + elt_offset));
   11235         6384 :                   tree idx = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
   11236         2128 :                                            vec_offset, TYPE_SIZE (idx_type),
   11237              :                                            boff);
   11238         2128 :                   idx = gimple_convert (&stmts, sizetype, idx);
   11239         2128 :                   idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
   11240         2128 :                   tree ptr = gimple_build (&stmts, PLUS_EXPR,
   11241         2128 :                                            TREE_TYPE (dataref_ptr),
   11242              :                                            dataref_ptr, idx);
   11243         2128 :                   ptr = gimple_convert (&stmts, ptr_type_node, ptr);
   11244         2128 :                   tree elt = make_ssa_name (TREE_TYPE (vectype));
   11245         2128 :                   tree ref = build2 (MEM_REF, ltype, ptr,
   11246              :                                      build_int_cst (ref_type, 0));
   11247         2128 :                   new_stmt = gimple_build_assign (elt, ref);
   11248         4256 :                   gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
   11249         2128 :                   gimple_seq_add_stmt (&stmts, new_stmt);
   11250         2128 :                   CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
   11251              :                 }
   11252          697 :               gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   11253          697 :               new_stmt = gimple_build_assign (NULL_TREE,
   11254              :                                               build_constructor (vectype,
   11255              :                                                                  ctor_elts));
   11256          697 :               data_ref = NULL_TREE;
   11257              :             }
   11258              : 
   11259          980 :           vec_dest = vect_create_destination_var (scalar_dest, vectype);
   11260              :           /* DATA_REF is null if we've already built the statement.  */
   11261          980 :           if (data_ref)
   11262              :             {
   11263              :               vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   11264              :               new_stmt = gimple_build_assign (vec_dest, data_ref);
   11265              :             }
   11266         1960 :           new_temp = (need_zeroing
   11267          980 :                       ? make_ssa_name (vectype)
   11268          980 :                       : make_ssa_name (vec_dest, new_stmt));
   11269          980 :           gimple_set_lhs (new_stmt, new_temp);
   11270          980 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11271              : 
   11272              :           /* If we need to explicitly zero inactive elements emit a
   11273              :              VEC_COND_EXPR that does so.  */
   11274          980 :           if (need_zeroing)
   11275              :             {
   11276            0 :               vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
   11277              :                                                  vectype);
   11278              : 
   11279            0 :               tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
   11280            0 :               new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
   11281              :                                               final_mask, new_temp, vec_els);
   11282            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11283            0 :               new_temp = new_temp2;
   11284              :             }
   11285              : 
   11286          980 :           if (ls.ls_type)
   11287              :             {
   11288            0 :               new_stmt = gimple_build_assign (make_ssa_name
   11289              :                                               (original_vectype),
   11290              :                                               VIEW_CONVERT_EXPR,
   11291              :                                               build1 (VIEW_CONVERT_EXPR,
   11292              :                                                       original_vectype,
   11293              :                                                       new_temp));
   11294            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11295              :             }
   11296              : 
   11297              :           /* Store vector loads in the corresponding SLP_NODE.  */
   11298          980 :           if (!costing_p)
   11299              :             {
   11300          980 :               if (ls.slp_perm)
   11301            0 :                 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
   11302              :               else
   11303          980 :                 slp_node->push_vec_def (new_stmt);
   11304              :             }
   11305              :         }
   11306              : 
   11307         2970 :       if (ls.slp_perm)
   11308              :         {
   11309            0 :           if (costing_p)
   11310              :             {
   11311            0 :               gcc_assert (ls.n_perms != -1U);
   11312            0 :               inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
   11313              :                                                slp_node, 0, vect_body);
   11314              :             }
   11315              :           else
   11316              :             {
   11317            0 :               unsigned n_perms2;
   11318            0 :               vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
   11319              :                                             false, &n_perms2);
   11320            0 :               gcc_assert (ls.n_perms == n_perms2);
   11321              :             }
   11322              :         }
   11323              : 
   11324         2970 :       if (costing_p)
   11325              :         {
   11326         2207 :           if (dump_enabled_p ())
   11327          315 :             dump_printf_loc (MSG_NOTE, vect_location,
   11328              :                              "vect_model_load_cost: inside_cost = %u, "
   11329              :                              "prologue_cost = %u .\n",
   11330              :                              inside_cost, prologue_cost);
   11331         2207 :           SLP_TREE_TYPE (slp_node) = load_vec_info_type;
   11332         2207 :           slp_node->data = new vect_load_store_data (std::move (ls));
   11333              :         }
   11334         2970 :       return true;
   11335         2970 :     }
   11336              : 
   11337       570897 :   aggr_type = vectype;
   11338       570897 :   if (!costing_p)
   11339       161649 :     bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
   11340              :                                         memory_access_type, loop_lens);
   11341              : 
   11342       570897 :   poly_uint64 group_elt = 0;
   11343       570897 :   unsigned int inside_cost = 0, prologue_cost = 0;
   11344              :   /* For costing some adjacent vector loads, we'd like to cost with
   11345              :      the total number of them once instead of cost each one by one. */
   11346       570897 :   unsigned int n_adjacent_loads = 0;
   11347              : 
   11348              :   /* 1. Create the vector or array pointer update chain.  */
   11349       570897 :   if (!costing_p)
   11350              :     {
   11351       161649 :       bool simd_lane_access_p
   11352       161649 :           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
   11353       161649 :       if (simd_lane_access_p
   11354         1629 :           && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
   11355         1629 :           && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
   11356         1629 :           && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
   11357         1629 :           && integer_zerop (DR_INIT (first_dr_info->dr))
   11358         1629 :           && alias_sets_conflict_p (get_alias_set (aggr_type),
   11359         1629 :                                     get_alias_set (TREE_TYPE (ref_type)))
   11360       161649 :           && (alignment_support_scheme == dr_aligned
   11361         1629 :               || alignment_support_scheme == dr_unaligned_supported))
   11362              :         {
   11363         1629 :           dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
   11364         1629 :           dataref_offset = build_int_cst (ref_type, 0);
   11365              :         }
   11366       160020 :       else if (diff_first_stmt_info)
   11367              :         {
   11368         3564 :           dataref_ptr
   11369         3564 :             = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
   11370              :                                         aggr_type, at_loop, offset, &dummy,
   11371              :                                         gsi, &ptr_incr, simd_lane_access_p,
   11372              :                                         bump);
   11373              :           /* Adjust the pointer by the difference to first_stmt.  */
   11374         3564 :           data_reference_p ptrdr
   11375              :             = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
   11376         3564 :           tree diff = fold_convert (sizetype,
   11377              :                                     size_binop (MINUS_EXPR,
   11378              :                                                 DR_INIT (first_dr_info->dr),
   11379              :                                                 DR_INIT (ptrdr)));
   11380         3564 :           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   11381              :                                          stmt_info, diff);
   11382         3564 :           if (alignment_support_scheme == dr_explicit_realign)
   11383              :             {
   11384            0 :               msq = vect_setup_realignment (vinfo, first_stmt_info_for_drptr,
   11385              :                                             vectype, gsi,
   11386              :                                             &realignment_token,
   11387              :                                             alignment_support_scheme,
   11388              :                                             dataref_ptr, &at_loop);
   11389            0 :               gcc_assert (!compute_in_loop);
   11390              :             }
   11391              :         }
   11392              :       else
   11393       156456 :         dataref_ptr
   11394       156456 :           = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
   11395              :                                       at_loop,
   11396              :                                       offset, &dummy, gsi, &ptr_incr,
   11397              :                                       simd_lane_access_p, bump);
   11398              :     }
   11399              :   else if (!costing_p)
   11400              :     {
   11401              :       gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
   11402              :       if (dataref_offset)
   11403              :         dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
   11404              :       else
   11405              :         dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   11406              :                                        stmt_info, bump);
   11407              :     }
   11408              : 
   11409       570897 :   auto_vec<tree> dr_chain;
   11410       570897 :   if (grouped_load || ls.slp_perm)
   11411        52994 :     dr_chain.create (vec_num);
   11412              : 
   11413              :   gimple *new_stmt = NULL;
   11414      1484289 :   for (i = 0; i < vec_num; i++)
   11415              :     {
   11416       913392 :       tree final_mask = NULL_TREE;
   11417       913392 :       tree final_len = NULL_TREE;
   11418       913392 :       tree bias = NULL_TREE;
   11419              : 
   11420       913392 :       if (!costing_p)
   11421              :         {
   11422       253908 :           if (mask_node)
   11423          709 :             vec_mask = vec_masks[i];
   11424       253908 :           if (loop_masks)
   11425           48 :             final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
   11426              :                                              vec_num, vectype, i);
   11427       253908 :           if (vec_mask)
   11428          709 :             final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
   11429              :                                            final_mask, vec_mask, gsi);
   11430              : 
   11431       253908 :           if (i > 0)
   11432        92259 :             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
   11433              :                                            gsi, stmt_info, bump);
   11434              :         }
   11435              : 
   11436              :       /* 2. Create the vector-load in the loop.  */
   11437       913392 :       switch (alignment_support_scheme)
   11438              :         {
   11439       913392 :         case dr_aligned:
   11440       913392 :         case dr_unaligned_supported:
   11441       913392 :           {
   11442       913392 :             if (costing_p)
   11443              :               break;
   11444              : 
   11445       253908 :             unsigned int misalign;
   11446       253908 :             unsigned HOST_WIDE_INT align;
   11447       253908 :             align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
   11448       253908 :             if (alignment_support_scheme == dr_aligned)
   11449              :               misalign = 0;
   11450       162864 :             else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
   11451              :               {
   11452       123469 :                 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
   11453       123469 :                 misalign = 0;
   11454              :               }
   11455              :             else
   11456        39395 :               misalign = misalignment;
   11457       253908 :             if (dataref_offset == NULL_TREE
   11458       251781 :                 && TREE_CODE (dataref_ptr) == SSA_NAME)
   11459       171229 :               set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
   11460              :                                       misalign);
   11461       253908 :             align = least_bit_hwi (misalign | align);
   11462              : 
   11463              :             /* Compute IFN when LOOP_LENS or final_mask valid.  */
   11464       253908 :             machine_mode vmode = TYPE_MODE (vectype);
   11465       253908 :             machine_mode new_vmode = vmode;
   11466       253908 :             internal_fn partial_ifn = IFN_LAST;
   11467       253908 :             if (loop_lens)
   11468              :               {
   11469            0 :                 opt_machine_mode new_ovmode
   11470            0 :                   = get_len_load_store_mode (vmode, true, &partial_ifn);
   11471            0 :                 new_vmode = new_ovmode.require ();
   11472            0 :                 unsigned factor
   11473            0 :                   = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
   11474            0 :                 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
   11475              :                                                vec_num, vectype, i, factor, true);
   11476              :               }
   11477       253908 :             else if (final_mask)
   11478              :               {
   11479          737 :                 if (!can_vec_mask_load_store_p (vmode,
   11480          737 :                                                 TYPE_MODE
   11481              :                                                   (TREE_TYPE (final_mask)),
   11482              :                                                 true, &partial_ifn))
   11483            0 :                   gcc_unreachable ();
   11484              :               }
   11485              : 
   11486       253908 :             if (partial_ifn == IFN_MASK_LEN_LOAD)
   11487              :               {
   11488            0 :                 if (!final_len)
   11489              :                   {
   11490              :                     /* Pass VF value to 'len' argument of
   11491              :                        MASK_LEN_LOAD if LOOP_LENS is invalid.  */
   11492            0 :                     final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
   11493              :                   }
   11494            0 :                 if (!final_mask)
   11495              :                   {
   11496              :                     /* Pass all ones value to 'mask' argument of
   11497              :                        MASK_LEN_LOAD if final_mask is invalid.  */
   11498            0 :                     mask_vectype = truth_type_for (vectype);
   11499            0 :                     final_mask = build_minus_one_cst (mask_vectype);
   11500              :                   }
   11501              :               }
   11502       253908 :             if (final_len)
   11503              :               {
   11504            0 :                 signed char biasval
   11505            0 :                   = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   11506            0 :                 bias = build_int_cst (intQI_type_node, biasval);
   11507              :               }
   11508              : 
   11509       253908 :             tree vec_els;
   11510              : 
   11511       253908 :             if (final_len)
   11512              :               {
   11513            0 :                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
   11514            0 :                 gcall *call;
   11515              : 
   11516              :                 /* Need conversion if the vectype is punned by VnQI.  */
   11517            0 :                 els_vectype = vectype;
   11518            0 :                 if (vmode != new_vmode)
   11519            0 :                   els_vectype
   11520            0 :                     = build_vector_type_for_mode (unsigned_intQI_type_node,
   11521              :                                                   new_vmode);
   11522            0 :                 vec_els = vect_get_mask_load_else (maskload_elsval,
   11523              :                                                    els_vectype);
   11524              : 
   11525            0 :                 if (partial_ifn == IFN_MASK_LEN_LOAD)
   11526              :                   {
   11527            0 :                     if (type_mode_padding_p
   11528            0 :                         && maskload_elsval != MASK_LOAD_ELSE_ZERO)
   11529            0 :                       need_zeroing = true;
   11530            0 :                     call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
   11531              :                                                        6, dataref_ptr, ptr,
   11532              :                                                        final_mask, vec_els,
   11533              :                                                        final_len, bias);
   11534              :                   }
   11535              :                 else
   11536            0 :                   call = gimple_build_call_internal (IFN_LEN_LOAD, 5,
   11537              :                                                      dataref_ptr, ptr,
   11538              :                                                      vec_els, final_len,
   11539              :                                                      bias);
   11540            0 :                 gimple_call_set_nothrow (call, true);
   11541            0 :                 new_stmt = call;
   11542            0 :                 data_ref = NULL_TREE;
   11543              : 
   11544              :                 /* Need conversion if it's wrapped with VnQI.  */
   11545            0 :                 if (vmode != new_vmode)
   11546              :                   {
   11547            0 :                     tree new_vtype
   11548            0 :                       = build_vector_type_for_mode (unsigned_intQI_type_node,
   11549              :                                                     new_vmode);
   11550            0 :                     tree var = vect_get_new_ssa_name (new_vtype,
   11551              :                                                       vect_simple_var);
   11552            0 :                     gimple_set_lhs (call, var);
   11553            0 :                     vect_finish_stmt_generation (vinfo, stmt_info, call,
   11554              :                                                  gsi);
   11555            0 :                     tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
   11556            0 :                     new_stmt = gimple_build_assign (vec_dest,
   11557              :                                                     VIEW_CONVERT_EXPR, op);
   11558              :                   }
   11559              :               }
   11560       253908 :             else if (final_mask)
   11561              :               {
   11562          737 :                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
   11563          737 :                 vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
   11564          737 :                 if (type_mode_padding_p
   11565          737 :                     && maskload_elsval != MASK_LOAD_ELSE_ZERO)
   11566            0 :                   need_zeroing = true;
   11567          737 :                 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
   11568              :                                                           dataref_ptr, ptr,
   11569              :                                                           final_mask,
   11570              :                                                           vec_els);
   11571          737 :                 gimple_call_set_nothrow (call, true);
   11572          737 :                 new_stmt = call;
   11573          737 :                 data_ref = NULL_TREE;
   11574              :               }
   11575              :             else
   11576              :               {
   11577       253171 :                 tree ltype = vectype;
   11578       253171 :                 tree new_vtype = NULL_TREE;
   11579       253171 :                 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
   11580       253171 :                 unsigned HOST_WIDE_INT dr_size
   11581       253171 :                   = vect_get_scalar_dr_size (first_dr_info);
   11582       253171 :                 poly_int64 off = 0;
   11583       253171 :                 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   11584         1445 :                   off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
   11585       253171 :                 unsigned int vect_align
   11586       253171 :                   = vect_known_alignment_in_bytes (first_dr_info, vectype,
   11587       253171 :                                                    off);
   11588              :                 /* Try to use a single smaller load when we are about
   11589              :                    to load excess elements compared to the unrolled
   11590              :                    scalar loop.  */
   11591       253171 :                 if (known_gt ((i + 1) * nunits,
   11592              :                               (group_size * vf - gap)))
   11593              :                   {
   11594         6949 :                     poly_uint64 remain = ((group_size * vf - gap) - i * nunits);
   11595         6949 :                     if (known_ge ((i + 1) * nunits - (group_size * vf - gap),
   11596              :                                   nunits))
   11597              :                       /* DR will be unused.  */
   11598              :                       ltype = NULL_TREE;
   11599         2290 :                     else if (known_ge (vect_align,
   11600              :                                        tree_to_poly_uint64
   11601              :                                          (TYPE_SIZE_UNIT (vectype))))
   11602              :                       /* Aligned access to excess elements is OK if
   11603              :                          at least one element is accessed in the
   11604              :                          scalar loop.  */
   11605              :                       ;
   11606         1899 :                     else if (known_gt (vect_align,
   11607              :                                        ((nunits - remain) * dr_size)))
   11608              :                       /* Aligned access to the gap area when there's
   11609              :                          at least one element in it is OK.  */
   11610              :                       ;
   11611              :                     else
   11612              :                       {
   11613              :                         /* remain should now be > 0 and < nunits.  */
   11614         1896 :                         unsigned num;
   11615         1896 :                         if (known_ne (remain, 0u)
   11616         1896 :                             && constant_multiple_p (nunits, remain, &num))
   11617              :                           {
   11618         1433 :                             tree ptype;
   11619         1433 :                             new_vtype
   11620         1433 :                               = vector_vector_composition_type (vectype, num,
   11621              :                                                                 &ptype);
   11622         1433 :                             if (new_vtype)
   11623         1433 :                               ltype = ptype;
   11624              :                           }
   11625              :                         /* Else use multiple loads or a masked load?  */
   11626              :                         /* For loop vectorization we now should have
   11627              :                            an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
   11628              :                            set.  */
   11629         1896 :                         if (loop_vinfo)
   11630         1645 :                           gcc_assert (new_vtype
   11631              :                                       || LOOP_VINFO_PEELING_FOR_GAPS
   11632              :                                            (loop_vinfo));
   11633              :                         /* But still reduce the access size to the next
   11634              :                            required power-of-two so peeling a single
   11635              :                            scalar iteration is sufficient.  */
   11636         1896 :                         unsigned HOST_WIDE_INT cremain;
   11637         1896 :                         if (remain.is_constant (&cremain))
   11638              :                           {
   11639         1896 :                             unsigned HOST_WIDE_INT cpart_size
   11640         1896 :                               = 1 << ceil_log2 (cremain);
   11641         1896 :                             if (known_gt (nunits, cpart_size)
   11642         1896 :                                 && constant_multiple_p (nunits, cpart_size,
   11643              :                                                         &num))
   11644              :                               {
   11645         1445 :                                 tree ptype;
   11646         1445 :                                 new_vtype
   11647         2890 :                                   = vector_vector_composition_type (vectype,
   11648         1445 :                                                                     num,
   11649              :                                                                     &ptype);
   11650         1445 :                                 if (new_vtype)
   11651         1445 :                                   ltype = ptype;
   11652              :                               }
   11653              :                           }
   11654              :                       }
   11655              :                   }
   11656       253171 :                 tree offset = (dataref_offset ? dataref_offset
   11657       251044 :                                : build_int_cst (ref_type, 0));
   11658       253171 :                 if (!ltype)
   11659              :                   ;
   11660       248512 :                 else if (ltype != vectype
   11661       248512 :                          && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   11662              :                   {
   11663           25 :                     poly_uint64 gap_offset
   11664           25 :                       = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
   11665           25 :                          - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
   11666           25 :                     tree gapcst = build_int_cstu (ref_type, gap_offset);
   11667           25 :                     offset = size_binop (PLUS_EXPR, offset, gapcst);
   11668              :                   }
   11669       253171 :                 if (ltype)
   11670              :                   {
   11671       248512 :                     data_ref = fold_build2 (MEM_REF, ltype,
   11672              :                                             dataref_ptr, offset);
   11673       248512 :                     if (alignment_support_scheme == dr_aligned
   11674       248512 :                         && align >= TYPE_ALIGN_UNIT (ltype))
   11675              :                       ;
   11676              :                     else
   11677       161158 :                       TREE_TYPE (data_ref)
   11678       322316 :                         = build_aligned_type (TREE_TYPE (data_ref),
   11679              :                                               align * BITS_PER_UNIT);
   11680              :                   }
   11681       253171 :                 if (!ltype)
   11682         4659 :                   data_ref = build_constructor (vectype, NULL);
   11683       248512 :                 else if (ltype != vectype)
   11684              :                   {
   11685         1445 :                     vect_copy_ref_info (data_ref,
   11686         1445 :                                         DR_REF (first_dr_info->dr));
   11687         1445 :                     tree tem = make_ssa_name (ltype);
   11688         1445 :                     new_stmt = gimple_build_assign (tem, data_ref);
   11689         1445 :                     vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
   11690              :                                                  gsi);
   11691         1445 :                     data_ref = NULL;
   11692         1445 :                     vec<constructor_elt, va_gc> *v;
   11693              :                     /* We've computed 'num' above to statically two
   11694              :                        or via constant_multiple_p.  */
   11695         1445 :                     unsigned num
   11696         1445 :                       = (exact_div (tree_to_poly_uint64
   11697         1445 :                                       (TYPE_SIZE_UNIT (vectype)),
   11698              :                                     tree_to_poly_uint64
   11699         1445 :                                       (TYPE_SIZE_UNIT (ltype)))
   11700         1445 :                          .to_constant ());
   11701         1445 :                     vec_alloc (v, num);
   11702         1445 :                     if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   11703              :                       {
   11704           62 :                         while (--num)
   11705           62 :                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   11706              :                                                   build_zero_cst (ltype));
   11707           25 :                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
   11708              :                       }
   11709              :                     else
   11710              :                       {
   11711         1420 :                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
   11712         1420 :                         while (--num)
   11713         3194 :                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
   11714              :                                                   build_zero_cst (ltype));
   11715              :                       }
   11716         1445 :                     gcc_assert (new_vtype != NULL_TREE);
   11717         1445 :                     if (new_vtype == vectype)
   11718         1413 :                       new_stmt
   11719         1413 :                         = gimple_build_assign (vec_dest,
   11720              :                                                build_constructor (vectype, v));
   11721              :                     else
   11722              :                       {
   11723           32 :                         tree new_vname = make_ssa_name (new_vtype);
   11724           32 :                         new_stmt
   11725           32 :                           = gimple_build_assign (new_vname,
   11726              :                                                  build_constructor (new_vtype,
   11727              :                                                                     v));
   11728           32 :                         vect_finish_stmt_generation (vinfo, stmt_info,
   11729              :                                                      new_stmt, gsi);
   11730           32 :                         new_stmt
   11731           32 :                           = gimple_build_assign (vec_dest,
   11732              :                                                  build1 (VIEW_CONVERT_EXPR,
   11733              :                                                          vectype, new_vname));
   11734              :                       }
   11735              :                   }
   11736              :               }
   11737              :             break;
   11738              :           }
   11739            0 :         case dr_explicit_realign:
   11740            0 :           {
   11741            0 :             if (costing_p)
   11742              :               break;
   11743            0 :             tree ptr, bump;
   11744              : 
   11745            0 :             tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
   11746              : 
   11747            0 :             if (compute_in_loop)
   11748            0 :               msq = vect_setup_realignment (vinfo, first_stmt_info, vectype,
   11749              :                                             gsi, &realignment_token,
   11750              :                                             dr_explicit_realign,
   11751              :                                             dataref_ptr, NULL);
   11752              : 
   11753            0 :             if (TREE_CODE (dataref_ptr) == SSA_NAME)
   11754            0 :               ptr = copy_ssa_name (dataref_ptr);
   11755              :             else
   11756            0 :               ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
   11757              :             // For explicit realign the target alignment should be
   11758              :             // known at compile time.
   11759            0 :             unsigned HOST_WIDE_INT align
   11760            0 :               = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
   11761            0 :             new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr,
   11762              :                                             build_int_cst
   11763            0 :                                               (TREE_TYPE (dataref_ptr),
   11764            0 :                                                -(HOST_WIDE_INT) align));
   11765            0 :             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11766            0 :             data_ref = build2 (MEM_REF, vectype,
   11767              :                                ptr, build_int_cst (ref_type, 0));
   11768            0 :             vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   11769            0 :             vec_dest = vect_create_destination_var (scalar_dest, vectype);
   11770            0 :             new_stmt = gimple_build_assign (vec_dest, data_ref);
   11771            0 :             new_temp = make_ssa_name (vec_dest, new_stmt);
   11772            0 :             gimple_assign_set_lhs (new_stmt, new_temp);
   11773            0 :             gimple_move_vops (new_stmt, stmt_info->stmt);
   11774            0 :             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11775            0 :             msq = new_temp;
   11776              : 
   11777            0 :             bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
   11778            0 :             bump = size_binop (MINUS_EXPR, bump, size_one_node);
   11779            0 :             ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
   11780              :                                    bump);
   11781            0 :             new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr,
   11782            0 :                                             build_int_cst (TREE_TYPE (ptr),
   11783            0 :                                                            -(HOST_WIDE_INT) align));
   11784            0 :             if (TREE_CODE (ptr) == SSA_NAME)
   11785            0 :               ptr = copy_ssa_name (ptr, new_stmt);
   11786              :             else
   11787            0 :               ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
   11788            0 :             gimple_assign_set_lhs (new_stmt, ptr);
   11789            0 :             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11790            0 :             data_ref = build2 (MEM_REF, vectype,
   11791              :                                ptr, build_int_cst (ref_type, 0));
   11792            0 :             break;
   11793              :           }
   11794            0 :         case dr_explicit_realign_optimized:
   11795            0 :           {
   11796            0 :             if (costing_p)
   11797              :               break;
   11798            0 :             if (TREE_CODE (dataref_ptr) == SSA_NAME)
   11799            0 :               new_temp = copy_ssa_name (dataref_ptr);
   11800              :             else
   11801            0 :               new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
   11802              :             // We should only be doing this if we know the target
   11803              :             // alignment at compile time.
   11804            0 :             unsigned HOST_WIDE_INT align
   11805            0 :               = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
   11806            0 :             new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, dataref_ptr,
   11807            0 :                                             build_int_cst (TREE_TYPE (dataref_ptr),
   11808            0 :                                                            -(HOST_WIDE_INT) align));
   11809            0 :             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11810            0 :             data_ref = build2 (MEM_REF, vectype, new_temp,
   11811              :                                build_int_cst (ref_type, 0));
   11812            0 :             break;
   11813              :           }
   11814            0 :         default:
   11815            0 :         gcc_unreachable ();
   11816              :         }
   11817              : 
   11818              :       /* One common place to cost the above vect load for different
   11819              :          alignment support schemes.  */
   11820       913392 :       if (costing_p)
   11821              :         {
   11822              :           /* For the prologue cost for realign,
   11823              :              we only need to count it once for the whole group.  */
   11824       659484 :           bool first_stmt_info_p = first_stmt_info == stmt_info;
   11825       659484 :           bool add_realign_cost = first_stmt_info_p && i == 0;
   11826       659484 :           if (memory_access_type == VMAT_CONTIGUOUS
   11827       659484 :               || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   11828              :             {
   11829              :               /* Leave realign cases alone to keep them simple.  */
   11830       659484 :               if (alignment_support_scheme == dr_explicit_realign_optimized
   11831              :                   || alignment_support_scheme == dr_explicit_realign)
   11832            0 :                 vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
   11833              :                                     alignment_support_scheme, misalignment,
   11834              :                                     add_realign_cost, &inside_cost,
   11835              :                                     &prologue_cost, cost_vec, cost_vec,
   11836              :                                     true);
   11837              :               else
   11838       659484 :                 n_adjacent_loads++;
   11839              :             }
   11840              :         }
   11841              :       else
   11842              :         {
   11843       253908 :           vec_dest = vect_create_destination_var (scalar_dest, vectype);
   11844              :           /* DATA_REF is null if we've already built the statement.  */
   11845       253908 :           if (data_ref)
   11846              :             {
   11847       251726 :               vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
   11848       251726 :               new_stmt = gimple_build_assign (vec_dest, data_ref);
   11849              :             }
   11850              : 
   11851       507816 :           new_temp = (need_zeroing
   11852       253908 :                       ? make_ssa_name (vectype)
   11853       253908 :                       : make_ssa_name (vec_dest, new_stmt));
   11854       253908 :           gimple_set_lhs (new_stmt, new_temp);
   11855       253908 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11856              : 
   11857              :           /* If we need to explicitly zero inactive elements emit a
   11858              :              VEC_COND_EXPR that does so.  */
   11859       253908 :           if (need_zeroing)
   11860              :             {
   11861            0 :               vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
   11862              :                                                  vectype);
   11863              : 
   11864            0 :               tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
   11865            0 :               new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
   11866              :                                               final_mask, new_temp, vec_els);
   11867            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
   11868              :                                            gsi);
   11869            0 :               new_temp = new_temp2;
   11870              :             }
   11871              :         }
   11872              : 
   11873              :       /* 3. Handle explicit realignment if necessary/supported.
   11874              :          Create in loop:
   11875              :          vec_dest = realign_load (msq, lsq, realignment_token)  */
   11876       913392 :       if (!costing_p
   11877       253908 :           && (alignment_support_scheme == dr_explicit_realign_optimized
   11878              :               || alignment_support_scheme == dr_explicit_realign))
   11879              :         {
   11880            0 :           lsq = gimple_assign_lhs (new_stmt);
   11881            0 :           if (!realignment_token)
   11882            0 :             realignment_token = dataref_ptr;
   11883            0 :           vec_dest = vect_create_destination_var (scalar_dest, vectype);
   11884            0 :           new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
   11885              :                                           lsq, realignment_token);
   11886            0 :           new_temp = make_ssa_name (vec_dest, new_stmt);
   11887            0 :           gimple_assign_set_lhs (new_stmt, new_temp);
   11888            0 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   11889              : 
   11890            0 :           if (alignment_support_scheme == dr_explicit_realign_optimized)
   11891              :             {
   11892            0 :               gcc_assert (phi);
   11893            0 :               if (i == vec_num - 1)
   11894            0 :                 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
   11895              :                              UNKNOWN_LOCATION);
   11896              :               msq = lsq;
   11897              :             }
   11898              :         }
   11899              : 
   11900       913392 :       if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
   11901              :         {
   11902         5932 :           if (costing_p)
   11903         4487 :             inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
   11904              :                                             slp_node, 0, vect_body);
   11905              :           else
   11906              :             {
   11907         1445 :               tree perm_mask = perm_mask_for_reverse (vectype);
   11908         1445 :               new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
   11909              :                                                perm_mask, stmt_info, gsi);
   11910         1445 :               new_stmt = SSA_NAME_DEF_STMT (new_temp);
   11911              :             }
   11912              :         }
   11913              : 
   11914              :       /* Collect vector loads and later create their permutation in
   11915              :          vect_transform_slp_perm_load.  */
   11916       913392 :       if (!costing_p && (grouped_load || ls.slp_perm))
   11917        73503 :         dr_chain.quick_push (new_temp);
   11918              : 
   11919              :       /* Store vector loads in the corresponding SLP_NODE.  */
   11920       253908 :       if (!costing_p && !ls.slp_perm)
   11921       180405 :         slp_node->push_vec_def (new_stmt);
   11922              : 
   11923              :       /* With SLP permutation we load the gaps as well, without
   11924              :          we need to skip the gaps after we manage to fully load
   11925              :          all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
   11926       913392 :       group_elt += nunits;
   11927       913392 :       if (!costing_p
   11928       253908 :           && maybe_ne (group_gap_adj, 0U)
   11929        46164 :           && !ls.slp_perm
   11930       934677 :           && known_eq (group_elt, group_size - group_gap_adj))
   11931              :         {
   11932        16623 :           poly_wide_int bump_val
   11933        16623 :             = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
   11934        16623 :           if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
   11935            0 :             bump_val = -bump_val;
   11936        16623 :           tree bump = wide_int_to_tree (sizetype, bump_val);
   11937        16623 :           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   11938              :                                          stmt_info, bump);
   11939        16623 :           group_elt = 0;
   11940        16623 :         }
   11941              :     }
   11942              :   /* Bump the vector pointer to account for a gap or for excess
   11943              :      elements loaded for a permuted SLP load.  */
   11944       570897 :   if (!costing_p
   11945       161649 :       && maybe_ne (group_gap_adj, 0U)
   11946       587974 :       && ls.slp_perm)
   11947              :     {
   11948          454 :       poly_wide_int bump_val
   11949          454 :         = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
   11950          454 :       if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
   11951            9 :         bump_val = -bump_val;
   11952          454 :       tree bump = wide_int_to_tree (sizetype, bump_val);
   11953          454 :       dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
   11954              :                                      stmt_info, bump);
   11955          454 :     }
   11956              : 
   11957       570897 :   if (ls.slp_perm)
   11958              :     {
   11959              :       /* For SLP we know we've seen all possible uses of dr_chain so
   11960              :          direct vect_transform_slp_perm_load to DCE the unused parts.
   11961              :          ???  This is a hack to prevent compile-time issues as seen
   11962              :          in PR101120 and friends.  */
   11963        52994 :       if (costing_p)
   11964              :         {
   11965        35920 :           gcc_assert (ls.n_perms != -1U && ls.n_loads != -1U);
   11966        35920 :           if (ls.n_perms != 0)
   11967        35413 :             inside_cost = record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
   11968              :                                             slp_node, 0, vect_body);
   11969        35920 :           if (n_adjacent_loads > 0)
   11970        35920 :             n_adjacent_loads = ls.n_loads;
   11971              :         }
   11972              :       else
   11973              :         {
   11974        17074 :           unsigned n_perms2, n_loads2;
   11975        17074 :           bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
   11976              :                                                   gsi, vf, false, &n_perms2,
   11977              :                                                   &n_loads2, true);
   11978        17074 :           gcc_assert (ok && ls.n_perms == n_perms2 && ls.n_loads == n_loads2);
   11979              :         }
   11980              :     }
   11981              : 
   11982       570897 :   if (costing_p)
   11983              :     {
   11984       409248 :       gcc_assert (memory_access_type == VMAT_CONTIGUOUS
   11985              :                   || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
   11986       409248 :       if (n_adjacent_loads > 0)
   11987       409248 :         vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
   11988              :                             alignment_support_scheme, misalignment, false,
   11989              :                             &inside_cost, &prologue_cost, cost_vec, cost_vec,
   11990              :                             true);
   11991       409248 :       if (dump_enabled_p ())
   11992        24108 :         dump_printf_loc (MSG_NOTE, vect_location,
   11993              :                          "vect_model_load_cost: inside_cost = %u, "
   11994              :                          "prologue_cost = %u .\n",
   11995              :                          inside_cost, prologue_cost);
   11996       409248 :       SLP_TREE_TYPE (slp_node) = load_vec_info_type;
   11997       409248 :       slp_node->data = new vect_load_store_data (std::move (ls));
   11998              :     }
   11999              : 
   12000       570897 :   return true;
   12001      1852610 : }
   12002              : 
   12003              : /* Function vect_is_simple_cond.
   12004              : 
   12005              :    Input:
   12006              :    LOOP - the loop that is being vectorized.
   12007              :    COND - Condition that is checked for simple use.
   12008              : 
   12009              :    Output:
   12010              :    *COMP_VECTYPE - the vector type for the comparison.
   12011              :    *DTS - The def types for the arguments of the comparison
   12012              : 
   12013              :    Returns whether a COND can be vectorized.  Checks whether
   12014              :    condition operands are supportable using vec_is_simple_use.  */
   12015              : 
   12016              : static bool
   12017        34899 : vect_is_simple_cond (tree cond, vec_info *vinfo,
   12018              :                      slp_tree slp_node, tree *comp_vectype,
   12019              :                      enum vect_def_type *dts, tree vectype)
   12020              : {
   12021        34899 :   tree lhs, rhs;
   12022        34899 :   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   12023        34899 :   slp_tree slp_op;
   12024              : 
   12025              :   /* Mask case.  */
   12026        34899 :   if (TREE_CODE (cond) == SSA_NAME
   12027        34899 :       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
   12028              :     {
   12029        34887 :       if (!vect_is_simple_use (vinfo, slp_node, 0, &cond,
   12030              :                                &slp_op, &dts[0], comp_vectype)
   12031        34887 :           || !*comp_vectype
   12032        69763 :           || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
   12033              :         return false;
   12034              :       return true;
   12035              :     }
   12036              : 
   12037           12 :   if (!COMPARISON_CLASS_P (cond))
   12038              :     return false;
   12039              : 
   12040            0 :   lhs = TREE_OPERAND (cond, 0);
   12041            0 :   rhs = TREE_OPERAND (cond, 1);
   12042              : 
   12043            0 :   if (TREE_CODE (lhs) == SSA_NAME)
   12044              :     {
   12045            0 :       if (!vect_is_simple_use (vinfo, slp_node, 0,
   12046              :                                &lhs, &slp_op, &dts[0], &vectype1))
   12047              :         return false;
   12048              :     }
   12049            0 :   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
   12050            0 :            || TREE_CODE (lhs) == FIXED_CST)
   12051            0 :     dts[0] = vect_constant_def;
   12052              :   else
   12053              :     return false;
   12054              : 
   12055            0 :   if (TREE_CODE (rhs) == SSA_NAME)
   12056              :     {
   12057            0 :       if (!vect_is_simple_use (vinfo, slp_node, 1,
   12058              :                                &rhs, &slp_op, &dts[1], &vectype2))
   12059              :         return false;
   12060              :     }
   12061            0 :   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
   12062            0 :            || TREE_CODE (rhs) == FIXED_CST)
   12063            0 :     dts[1] = vect_constant_def;
   12064              :   else
   12065              :     return false;
   12066              : 
   12067            0 :   if (vectype1 && vectype2
   12068            0 :       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   12069            0 :                    TYPE_VECTOR_SUBPARTS (vectype2)))
   12070            0 :     return false;
   12071              : 
   12072            0 :   *comp_vectype = vectype1 ? vectype1 : vectype2;
   12073              :   /* Invariant comparison.  */
   12074            0 :   if (! *comp_vectype)
   12075              :     {
   12076            0 :       tree scalar_type = TREE_TYPE (lhs);
   12077            0 :       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
   12078            0 :         *comp_vectype = truth_type_for (vectype);
   12079              :       else
   12080              :         {
   12081              :           /* If we can widen the comparison to match vectype do so.  */
   12082            0 :           if (INTEGRAL_TYPE_P (scalar_type)
   12083            0 :               && !slp_node
   12084            0 :               && tree_int_cst_lt (TYPE_SIZE (scalar_type),
   12085            0 :                                   TYPE_SIZE (TREE_TYPE (vectype))))
   12086            0 :             scalar_type = build_nonstandard_integer_type
   12087            0 :               (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
   12088            0 :           *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
   12089              :                                                        slp_node);
   12090              :         }
   12091              :     }
   12092              : 
   12093              :   return true;
   12094              : }
   12095              : 
   12096              : /* vectorizable_condition.
   12097              : 
   12098              :    Check if STMT_INFO is conditional modify expression that can be vectorized.
   12099              :    If COST_VEC is passed, calculate costs but don't change anything,
   12100              :    otherwise, vectorize STMT_INFO: create a vectorized stmt using
   12101              :    VEC_COND_EXPR to replace it, and insert it at GSI.
   12102              : 
   12103              :    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
   12104              : 
   12105              :    Return true if STMT_INFO is vectorizable in this way.  */
   12106              : 
   12107              : static bool
   12108       676283 : vectorizable_condition (vec_info *vinfo,
   12109              :                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   12110              :                         slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   12111              : {
   12112       676283 :   tree scalar_dest = NULL_TREE;
   12113       676283 :   tree vec_dest = NULL_TREE;
   12114       676283 :   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
   12115       676283 :   tree then_clause, else_clause;
   12116       676283 :   tree comp_vectype = NULL_TREE;
   12117       676283 :   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
   12118       676283 :   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   12119       676283 :   tree vec_compare;
   12120       676283 :   tree new_temp;
   12121       676283 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   12122       676283 :   enum vect_def_type dts[4]
   12123              :     = {vect_unknown_def_type, vect_unknown_def_type,
   12124              :        vect_unknown_def_type, vect_unknown_def_type};
   12125       676283 :   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
   12126       676283 :   int i;
   12127       676283 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   12128       676283 :   vec<tree> vec_oprnds0 = vNULL;
   12129       676283 :   vec<tree> vec_oprnds1 = vNULL;
   12130       676283 :   vec<tree> vec_oprnds2 = vNULL;
   12131       676283 :   vec<tree> vec_oprnds3 = vNULL;
   12132       676283 :   tree vec_cmp_type;
   12133       676283 :   bool masked = false;
   12134              : 
   12135       676283 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   12136              :     return false;
   12137              : 
   12138              :   /* Is vectorizable conditional operation?  */
   12139      1020128 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   12140       378715 :   if (!stmt)
   12141              :     return false;
   12142              : 
   12143       378715 :   code = gimple_assign_rhs_code (stmt);
   12144       378715 :   if (code != COND_EXPR)
   12145              :     return false;
   12146              : 
   12147        34899 :   int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
   12148        34899 :   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
   12149        34899 :   bool nested_cycle_p = false;
   12150        34899 :   bool for_reduction = vect_is_reduction (stmt_info);
   12151        34899 :   if (for_reduction)
   12152              :     {
   12153          614 :       if (SLP_TREE_LANES (slp_node) > 1)
   12154              :         return false;
   12155              :       /* ???  With a reduction path we do not get at the reduction info from
   12156              :          every stmt, use the conservative default setting then.  */
   12157          694 :       if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
   12158              :         {
   12159          596 :           vect_reduc_info reduc_info
   12160          596 :             = info_for_reduction (loop_vinfo, slp_node);
   12161          596 :           reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
   12162          596 :           nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
   12163              :                                                   stmt_info);
   12164              :         }
   12165              :     }
   12166              :   else
   12167              :     {
   12168        34285 :       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
   12169              :         return false;
   12170              :     }
   12171              : 
   12172        34899 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
   12173        34899 :   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   12174              : 
   12175        34899 :   int vec_num = vect_get_num_copies (vinfo, slp_node);
   12176              : 
   12177        34899 :   cond_expr = gimple_assign_rhs1 (stmt);
   12178        34899 :   gcc_assert (! COMPARISON_CLASS_P (cond_expr));
   12179              : 
   12180        34899 :   if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
   12181              :                             &comp_vectype, &dts[0], vectype)
   12182        34899 :       || !comp_vectype)
   12183              :     return false;
   12184              : 
   12185        34876 :   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
   12186        34876 :   slp_tree then_slp_node, else_slp_node;
   12187        34876 :   if (!vect_is_simple_use (vinfo, slp_node, 1 + op_adjust,
   12188              :                            &then_clause, &then_slp_node, &dts[2], &vectype1))
   12189              :     return false;
   12190        34876 :   if (!vect_is_simple_use (vinfo, slp_node, 2 + op_adjust,
   12191              :                            &else_clause, &else_slp_node, &dts[3], &vectype2))
   12192              :     return false;
   12193              : 
   12194        34876 :   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
   12195              :     return false;
   12196              : 
   12197        34876 :   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
   12198              :     return false;
   12199              : 
   12200        34876 :   masked = !COMPARISON_CLASS_P (cond_expr);
   12201        34876 :   vec_cmp_type = truth_type_for (comp_vectype);
   12202        34876 :   if (vec_cmp_type == NULL_TREE
   12203        69752 :       || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
   12204        34876 :                    TYPE_VECTOR_SUBPARTS (vec_cmp_type)))
   12205            0 :     return false;
   12206              : 
   12207        34876 :   cond_code = TREE_CODE (cond_expr);
   12208        34876 :   if (!masked)
   12209              :     {
   12210            0 :       cond_expr0 = TREE_OPERAND (cond_expr, 0);
   12211            0 :       cond_expr1 = TREE_OPERAND (cond_expr, 1);
   12212              :     }
   12213              : 
   12214              :   /* For conditional reductions, the "then" value needs to be the candidate
   12215              :      value calculated by this iteration while the "else" value needs to be
   12216              :      the result carried over from previous iterations.  If the COND_EXPR
   12217              :      is the other way around, we need to swap it.  */
   12218        34876 :   bool must_invert_cmp_result = false;
   12219        34876 :   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
   12220              :     {
   12221            0 :       if (masked)
   12222            0 :         must_invert_cmp_result = true;
   12223              :       else
   12224              :         {
   12225            0 :           bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
   12226            0 :           tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
   12227            0 :           if (new_code == ERROR_MARK)
   12228              :             must_invert_cmp_result = true;
   12229              :           else
   12230              :             {
   12231            0 :               cond_code = new_code;
   12232              :               /* Make sure we don't accidentally use the old condition.  */
   12233            0 :               cond_expr = NULL_TREE;
   12234              :             }
   12235              :         }
   12236              :       /* ???  The vectorized operand query below doesn't allow swapping
   12237              :          this way for SLP.  */
   12238            0 :       return false;
   12239              :       /* std::swap (then_clause, else_clause); */
   12240              :     }
   12241              : 
   12242        34876 :   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
   12243              :     {
   12244              :       /* Boolean values may have another representation in vectors
   12245              :          and therefore we prefer bit operations over comparison for
   12246              :          them (which also works for scalar masks).  We store opcodes
   12247              :          to use in bitop1 and bitop2.  Statement is vectorized as
   12248              :          BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
   12249              :          depending on bitop1 and bitop2 arity.  */
   12250            0 :       switch (cond_code)
   12251              :         {
   12252              :         case GT_EXPR:
   12253              :           bitop1 = BIT_NOT_EXPR;
   12254              :           bitop2 = BIT_AND_EXPR;
   12255              :           break;
   12256            0 :         case GE_EXPR:
   12257            0 :           bitop1 = BIT_NOT_EXPR;
   12258            0 :           bitop2 = BIT_IOR_EXPR;
   12259            0 :           break;
   12260            0 :         case LT_EXPR:
   12261            0 :           bitop1 = BIT_NOT_EXPR;
   12262            0 :           bitop2 = BIT_AND_EXPR;
   12263            0 :           std::swap (cond_expr0, cond_expr1);
   12264            0 :           break;
   12265            0 :         case LE_EXPR:
   12266            0 :           bitop1 = BIT_NOT_EXPR;
   12267            0 :           bitop2 = BIT_IOR_EXPR;
   12268            0 :           std::swap (cond_expr0, cond_expr1);
   12269            0 :           break;
   12270            0 :         case NE_EXPR:
   12271            0 :           bitop1 = BIT_XOR_EXPR;
   12272            0 :           break;
   12273            0 :         case EQ_EXPR:
   12274            0 :           bitop1 = BIT_XOR_EXPR;
   12275            0 :           bitop2 = BIT_NOT_EXPR;
   12276            0 :           break;
   12277              :         default:
   12278              :           return false;
   12279              :         }
   12280              :       cond_code = SSA_NAME;
   12281              :     }
   12282              : 
   12283        34876 :   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
   12284            0 :       && reduction_type == EXTRACT_LAST_REDUCTION
   12285        34876 :       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
   12286              :     {
   12287            0 :       if (dump_enabled_p ())
   12288            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   12289              :                          "reduction comparison operation not supported.\n");
   12290            0 :       return false;
   12291              :     }
   12292              : 
   12293        34876 :   if (cost_vec)
   12294              :     {
   12295        26326 :       if (bitop1 != NOP_EXPR)
   12296              :         {
   12297            0 :           machine_mode mode = TYPE_MODE (comp_vectype);
   12298            0 :           optab optab;
   12299              : 
   12300            0 :           optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
   12301            0 :           if (!optab || !can_implement_p (optab, mode))
   12302            0 :             return false;
   12303              : 
   12304            0 :           if (bitop2 != NOP_EXPR)
   12305              :             {
   12306            0 :               optab = optab_for_tree_code (bitop2, comp_vectype,
   12307              :                                            optab_default);
   12308            0 :               if (!optab || !can_implement_p (optab, mode))
   12309            0 :                 return false;
   12310              :             }
   12311              :         }
   12312              : 
   12313        26326 :       vect_cost_for_stmt kind = vector_stmt;
   12314        26326 :       if (reduction_type == EXTRACT_LAST_REDUCTION)
   12315              :         /* Count one reduction-like operation per vector.  */
   12316              :         kind = vec_to_scalar;
   12317        26326 :       else if ((masked && !expand_vec_cond_expr_p (vectype, comp_vectype))
   12318        26326 :                || (!masked
   12319            0 :                    && (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
   12320              :                                                cond_code)
   12321            0 :                        || !expand_vec_cond_expr_p (vectype, vec_cmp_type))))
   12322            6 :         return false;
   12323              : 
   12324        26320 :       if (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
   12325              :                                              comp_vectype)
   12326        26320 :           || (op_adjust == 1
   12327            0 :               && !vect_maybe_update_slp_op_vectype
   12328            0 :                               (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
   12329        26320 :           || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
   12330        52640 :           || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype))
   12331              :         {
   12332            0 :           if (dump_enabled_p ())
   12333            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   12334              :                              "incompatible vector types for invariants\n");
   12335            0 :           return false;
   12336              :         }
   12337              : 
   12338        26320 :       if (loop_vinfo && for_reduction
   12339          447 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
   12340              :         {
   12341           68 :           if (reduction_type == EXTRACT_LAST_REDUCTION)
   12342              :             {
   12343            0 :               if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
   12344              :                                                   vectype, OPTIMIZE_FOR_SPEED))
   12345            0 :                 vect_record_loop_len (loop_vinfo,
   12346              :                                       &LOOP_VINFO_LENS (loop_vinfo),
   12347              :                                       vec_num, vectype, 1);
   12348              :               else
   12349            0 :                 vect_record_loop_mask (loop_vinfo,
   12350              :                                        &LOOP_VINFO_MASKS (loop_vinfo),
   12351              :                                        vec_num, vectype, NULL);
   12352              :             }
   12353              :           /* Extra inactive lanes should be safe for vect_nested_cycle.  */
   12354           68 :           else if (!nested_cycle_p)
   12355              :             {
   12356           68 :               if (dump_enabled_p ())
   12357            8 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   12358              :                                  "conditional reduction prevents the use"
   12359              :                                  " of partial vectors.\n");
   12360           68 :               LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   12361              :             }
   12362              :         }
   12363              : 
   12364        26320 :       SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
   12365        26320 :       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
   12366        26320 :       return true;
   12367              :     }
   12368              : 
   12369              :   /* Transform.  */
   12370              : 
   12371              :   /* Handle def.  */
   12372         8550 :   scalar_dest = gimple_assign_lhs (stmt);
   12373         8550 :   if (reduction_type != EXTRACT_LAST_REDUCTION)
   12374         8550 :     vec_dest = vect_create_destination_var (scalar_dest, vectype);
   12375              : 
   12376         8550 :   bool swap_cond_operands = false;
   12377              : 
   12378              :   /* See whether another part of the vectorized code applies a loop
   12379              :      mask to the condition, or to its inverse.  */
   12380              : 
   12381         8550 :   vec_loop_masks *masks = NULL;
   12382         8550 :   vec_loop_lens *lens = NULL;
   12383         8550 :   if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
   12384              :     {
   12385            0 :       if (reduction_type == EXTRACT_LAST_REDUCTION)
   12386            0 :         lens = &LOOP_VINFO_LENS (loop_vinfo);
   12387              :     }
   12388         8550 :   else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
   12389              :     {
   12390            3 :       if (reduction_type == EXTRACT_LAST_REDUCTION)
   12391            0 :         masks = &LOOP_VINFO_MASKS (loop_vinfo);
   12392              :       else
   12393              :         {
   12394            3 :           scalar_cond_masked_key cond (cond_expr, 1);
   12395            3 :           if (loop_vinfo->scalar_cond_masked_set.contains (cond))
   12396            0 :             masks = &LOOP_VINFO_MASKS (loop_vinfo);
   12397              :           else
   12398              :             {
   12399            3 :               bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
   12400            3 :               tree_code orig_code = cond.code;
   12401            3 :               cond.code = invert_tree_comparison (cond.code, honor_nans);
   12402            3 :               if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
   12403              :                 {
   12404            0 :                   masks = &LOOP_VINFO_MASKS (loop_vinfo);
   12405            0 :                   cond_code = cond.code;
   12406            0 :                   swap_cond_operands = true;
   12407              :                 }
   12408              :               else
   12409              :                 {
   12410              :                   /* Try the inverse of the current mask.  We check if the
   12411              :                      inverse mask is live and if so we generate a negate of
   12412              :                      the current mask such that we still honor NaNs.  */
   12413            3 :                   cond.inverted_p = true;
   12414            3 :                   cond.code = orig_code;
   12415            3 :                   if (loop_vinfo->scalar_cond_masked_set.contains (cond))
   12416              :                     {
   12417            0 :                       masks = &LOOP_VINFO_MASKS (loop_vinfo);
   12418            0 :                       cond_code = cond.code;
   12419            0 :                       swap_cond_operands = true;
   12420            0 :                       must_invert_cmp_result = true;
   12421              :                     }
   12422              :                 }
   12423              :             }
   12424              :         }
   12425              :     }
   12426              : 
   12427              :   /* Handle cond expr.  */
   12428         8550 :   if (masked)
   12429         8550 :     vect_get_vec_defs (vinfo, slp_node,
   12430              :                        cond_expr, &vec_oprnds0,
   12431              :                        then_clause, &vec_oprnds2,
   12432              :                        reduction_type != EXTRACT_LAST_REDUCTION
   12433              :                        ? else_clause : NULL, &vec_oprnds3);
   12434              :   else
   12435            0 :     vect_get_vec_defs (vinfo, slp_node,
   12436              :                        cond_expr0, &vec_oprnds0,
   12437              :                        cond_expr1, &vec_oprnds1,
   12438              :                        then_clause, &vec_oprnds2,
   12439              :                        reduction_type != EXTRACT_LAST_REDUCTION
   12440              :                        ? else_clause : NULL, &vec_oprnds3);
   12441              : 
   12442         8550 :   if (reduction_type == EXTRACT_LAST_REDUCTION)
   12443            0 :     vec_else_clause = else_clause;
   12444              : 
   12445              :   /* Arguments are ready.  Create the new vector stmt.  */
   12446        20098 :   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
   12447              :     {
   12448        11548 :       vec_then_clause = vec_oprnds2[i];
   12449        11548 :       if (reduction_type != EXTRACT_LAST_REDUCTION)
   12450        11548 :         vec_else_clause = vec_oprnds3[i];
   12451              : 
   12452        11548 :       if (swap_cond_operands)
   12453            0 :         std::swap (vec_then_clause, vec_else_clause);
   12454              : 
   12455        11548 :       if (masked)
   12456              :         vec_compare = vec_cond_lhs;
   12457              :       else
   12458              :         {
   12459            0 :           vec_cond_rhs = vec_oprnds1[i];
   12460            0 :           if (bitop1 == NOP_EXPR)
   12461              :             {
   12462            0 :               gimple_seq stmts = NULL;
   12463            0 :               vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
   12464              :                                            vec_cond_lhs, vec_cond_rhs);
   12465            0 :               gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
   12466              :             }
   12467              :           else
   12468              :             {
   12469            0 :               new_temp = make_ssa_name (vec_cmp_type);
   12470            0 :               gassign *new_stmt;
   12471            0 :               if (bitop1 == BIT_NOT_EXPR)
   12472            0 :                 new_stmt = gimple_build_assign (new_temp, bitop1,
   12473              :                                                 vec_cond_rhs);
   12474              :               else
   12475            0 :                 new_stmt
   12476            0 :                   = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
   12477              :                                          vec_cond_rhs);
   12478            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12479            0 :               if (bitop2 == NOP_EXPR)
   12480              :                 vec_compare = new_temp;
   12481            0 :               else if (bitop2 == BIT_NOT_EXPR
   12482            0 :                        && reduction_type != EXTRACT_LAST_REDUCTION)
   12483              :                 {
   12484              :                   /* Instead of doing ~x ? y : z do x ? z : y.  */
   12485              :                   vec_compare = new_temp;
   12486              :                   std::swap (vec_then_clause, vec_else_clause);
   12487              :                 }
   12488              :               else
   12489              :                 {
   12490            0 :                   vec_compare = make_ssa_name (vec_cmp_type);
   12491            0 :                   if (bitop2 == BIT_NOT_EXPR)
   12492            0 :                     new_stmt
   12493            0 :                       = gimple_build_assign (vec_compare, bitop2, new_temp);
   12494              :                   else
   12495            0 :                     new_stmt
   12496            0 :                       = gimple_build_assign (vec_compare, bitop2,
   12497              :                                              vec_cond_lhs, new_temp);
   12498            0 :                   vect_finish_stmt_generation (vinfo, stmt_info,
   12499              :                                                new_stmt, gsi);
   12500              :                 }
   12501              :             }
   12502              :         }
   12503              : 
   12504              :       /* If we decided to apply a loop mask to the result of the vector
   12505              :          comparison, AND the comparison with the mask now.  Later passes
   12506              :          should then be able to reuse the AND results between multiple
   12507              :          vector statements.
   12508              : 
   12509              :          For example:
   12510              :          for (int i = 0; i < 100; ++i)
   12511              :          x[i] = y[i] ? z[i] : 10;
   12512              : 
   12513              :          results in following optimized GIMPLE:
   12514              : 
   12515              :          mask__35.8_43 = vect__4.7_41 != { 0, ... };
   12516              :          vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
   12517              :          _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
   12518              :          vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
   12519              :          vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
   12520              :          vect_iftmp.11_47, { 10, ... }>;
   12521              : 
   12522              :          instead of using a masked and unmasked forms of
   12523              :          vec != { 0, ... } (masked in the MASK_LOAD,
   12524              :          unmasked in the VEC_COND_EXPR).  */
   12525              : 
   12526              :       /* Force vec_compare to be an SSA_NAME rather than a comparison,
   12527              :          in cases where that's necessary.  */
   12528              : 
   12529        11548 :       tree len = NULL_TREE, bias = NULL_TREE;
   12530        11548 :       if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
   12531              :         {
   12532            0 :           if (!is_gimple_val (vec_compare))
   12533              :             {
   12534            0 :               tree vec_compare_name = make_ssa_name (vec_cmp_type);
   12535            0 :               gassign *new_stmt = gimple_build_assign (vec_compare_name,
   12536              :                                                        vec_compare);
   12537            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12538            0 :               vec_compare = vec_compare_name;
   12539              :             }
   12540              : 
   12541            0 :           if (must_invert_cmp_result)
   12542              :             {
   12543            0 :               tree vec_compare_name = make_ssa_name (vec_cmp_type);
   12544            0 :               gassign *new_stmt = gimple_build_assign (vec_compare_name,
   12545              :                                                        BIT_NOT_EXPR,
   12546              :                                                        vec_compare);
   12547            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12548            0 :               vec_compare = vec_compare_name;
   12549              :             }
   12550              : 
   12551            0 :           if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
   12552              :                                               vectype, OPTIMIZE_FOR_SPEED))
   12553              :             {
   12554            0 :               if (lens)
   12555              :                 {
   12556              :                   /* ??? Do we really want the adjusted LEN here?  Isn't this
   12557              :                      based on number of elements?  */
   12558            0 :                   len = vect_get_loop_len (loop_vinfo, gsi, lens,
   12559              :                                            vec_num, vectype, i, 1, true);
   12560            0 :                   signed char biasval
   12561            0 :                     = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
   12562            0 :                   bias = build_int_cst (intQI_type_node, biasval);
   12563              :                 }
   12564              :               else
   12565              :                 {
   12566            0 :                   len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
   12567            0 :                   bias = build_int_cst (intQI_type_node, 0);
   12568              :                 }
   12569              :             }
   12570            0 :           if (masks)
   12571              :             {
   12572            0 :               tree loop_mask
   12573            0 :                 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
   12574              :                                       vectype, i);
   12575            0 :               tree tmp2 = make_ssa_name (vec_cmp_type);
   12576            0 :               gassign *g
   12577            0 :                 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
   12578              :                                        loop_mask);
   12579            0 :               vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
   12580            0 :               vec_compare = tmp2;
   12581              :             }
   12582              :         }
   12583              : 
   12584            0 :       gimple *new_stmt;
   12585            0 :       if (reduction_type == EXTRACT_LAST_REDUCTION)
   12586              :         {
   12587            0 :           gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
   12588            0 :           tree lhs = gimple_get_lhs (old_stmt);
   12589            0 :           if ((unsigned)i != vec_oprnds0.length () - 1)
   12590            0 :             lhs = copy_ssa_name (lhs);
   12591            0 :           if (len)
   12592            0 :             new_stmt = gimple_build_call_internal
   12593            0 :                 (IFN_LEN_FOLD_EXTRACT_LAST, 5, vec_else_clause, vec_compare,
   12594              :                  vec_then_clause, len, bias);
   12595              :           else
   12596            0 :             new_stmt = gimple_build_call_internal
   12597            0 :                 (IFN_FOLD_EXTRACT_LAST, 3, vec_else_clause, vec_compare,
   12598              :                  vec_then_clause);
   12599            0 :           gimple_call_set_lhs (new_stmt, lhs);
   12600            0 :           SSA_NAME_DEF_STMT (lhs) = new_stmt;
   12601            0 :           if ((unsigned)i != vec_oprnds0.length () - 1)
   12602              :             {
   12603            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12604            0 :               vec_else_clause = lhs;
   12605              :             }
   12606            0 :           else if (old_stmt == gsi_stmt (*gsi))
   12607            0 :             vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
   12608              :           else
   12609              :             {
   12610              :               /* In this case we're moving the definition to later in the
   12611              :                  block.  That doesn't matter because the only uses of the
   12612              :                  lhs are in phi statements.  */
   12613            0 :               gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
   12614            0 :               gsi_remove (&old_gsi, true);
   12615            0 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12616              :             }
   12617              :         }
   12618              :       else
   12619              :         {
   12620        11548 :           new_temp = make_ssa_name (vec_dest);
   12621        11548 :           new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
   12622              :                                           vec_then_clause, vec_else_clause);
   12623        11548 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12624              :         }
   12625        11548 :       slp_node->push_vec_def (new_stmt);
   12626              :     }
   12627              : 
   12628         8550 :   vec_oprnds0.release ();
   12629         8550 :   vec_oprnds1.release ();
   12630         8550 :   vec_oprnds2.release ();
   12631         8550 :   vec_oprnds3.release ();
   12632              : 
   12633         8550 :   return true;
   12634              : }
   12635              : 
   12636              : /* Helper of vectorizable_comparison.
   12637              : 
   12638              :    Check if STMT_INFO is comparison expression CODE that can be vectorized.
   12639              :    If COST_VEC is passed, calculate costs but don't change anything,
   12640              :    otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
   12641              :    it at GSI.
   12642              : 
   12643              :    Return true if STMT_INFO is vectorizable in this way.  */
   12644              : 
   12645              : static bool
   12646       352209 : vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
   12647              :                            stmt_vec_info stmt_info, tree_code code,
   12648              :                            gimple_stmt_iterator *gsi,
   12649              :                            slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   12650              : {
   12651       352209 :   tree lhs, rhs1, rhs2;
   12652       352209 :   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
   12653       352209 :   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
   12654       352209 :   tree new_temp;
   12655       352209 :   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
   12656       352209 :   poly_uint64 nunits;
   12657       352209 :   enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
   12658       352209 :   int i;
   12659       352209 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   12660       352209 :   vec<tree> vec_oprnds0 = vNULL;
   12661       352209 :   vec<tree> vec_oprnds1 = vNULL;
   12662       352209 :   tree mask_type;
   12663       352209 :   tree mask = NULL_TREE;
   12664              : 
   12665       352209 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   12666              :     return false;
   12667              : 
   12668       352209 :   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
   12669              :     return false;
   12670              : 
   12671       159826 :   mask_type = vectype;
   12672       159826 :   nunits = TYPE_VECTOR_SUBPARTS (vectype);
   12673              : 
   12674       159826 :   if (TREE_CODE_CLASS (code) != tcc_comparison)
   12675              :     return false;
   12676              : 
   12677       158068 :   slp_tree slp_rhs1, slp_rhs2;
   12678       158068 :   if (!vect_is_simple_use (vinfo, slp_node,
   12679              :                            0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
   12680              :     return false;
   12681              : 
   12682       158068 :   if (!vect_is_simple_use (vinfo, slp_node,
   12683              :                            1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
   12684              :     return false;
   12685              : 
   12686       122812 :   if (vectype1 && vectype2
   12687       231035 :       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
   12688        72967 :                    TYPE_VECTOR_SUBPARTS (vectype2)))
   12689           16 :     return false;
   12690              : 
   12691       158052 :   vectype = vectype1 ? vectype1 : vectype2;
   12692              : 
   12693              :   /* Invariant comparison.  */
   12694       158052 :   if (!vectype)
   12695              :     {
   12696        30407 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
   12697        30407 :       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
   12698            7 :         return false;
   12699              :     }
   12700       127645 :   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
   12701              :     return false;
   12702              : 
   12703              :   /* Can't compare mask and non-mask types.  */
   12704       122796 :   if (vectype1 && vectype2
   12705       376354 :       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
   12706              :     return false;
   12707              : 
   12708              :   /* Boolean values may have another representation in vectors
   12709              :      and therefore we prefer bit operations over comparison for
   12710              :      them (which also works for scalar masks).  We store opcodes
   12711              :      to use in bitop1 and bitop2.  Statement is vectorized as
   12712              :        BITOP2 (rhs1 BITOP1 rhs2) or
   12713              :        rhs1 BITOP2 (BITOP1 rhs2)
   12714              :      depending on bitop1 and bitop2 arity.  */
   12715       158037 :   bool swap_p = false;
   12716       158037 :   if (VECTOR_BOOLEAN_TYPE_P (vectype))
   12717              :     {
   12718          654 :       if (code == GT_EXPR)
   12719              :         {
   12720              :           bitop1 = BIT_NOT_EXPR;
   12721              :           bitop2 = BIT_AND_EXPR;
   12722              :         }
   12723              :       else if (code == GE_EXPR)
   12724              :         {
   12725              :           bitop1 = BIT_NOT_EXPR;
   12726              :           bitop2 = BIT_IOR_EXPR;
   12727              :         }
   12728              :       else if (code == LT_EXPR)
   12729              :         {
   12730              :           bitop1 = BIT_NOT_EXPR;
   12731              :           bitop2 = BIT_AND_EXPR;
   12732              :           swap_p = true;
   12733              :         }
   12734              :       else if (code == LE_EXPR)
   12735              :         {
   12736              :           bitop1 = BIT_NOT_EXPR;
   12737              :           bitop2 = BIT_IOR_EXPR;
   12738              :           swap_p = true;
   12739              :         }
   12740              :       else
   12741              :         {
   12742              :           bitop1 = BIT_XOR_EXPR;
   12743              :           if (code == EQ_EXPR)
   12744              :             bitop2 = BIT_NOT_EXPR;
   12745              :         }
   12746              :     }
   12747              : 
   12748       158037 :   if (cost_vec)
   12749              :     {
   12750       145536 :       if (bitop1 == NOP_EXPR)
   12751              :         {
   12752       145014 :           if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
   12753              :             return false;
   12754              :         }
   12755              :       else
   12756              :         {
   12757          522 :           machine_mode mode = TYPE_MODE (vectype);
   12758          522 :           optab optab;
   12759              : 
   12760          522 :           optab = optab_for_tree_code (bitop1, vectype, optab_default);
   12761          522 :           if (!optab || !can_implement_p (optab, mode))
   12762            0 :             return false;
   12763              : 
   12764          522 :           if (bitop2 != NOP_EXPR)
   12765              :             {
   12766           91 :               optab = optab_for_tree_code (bitop2, vectype, optab_default);
   12767           91 :               if (!optab || !can_implement_p (optab, mode))
   12768            0 :                 return false;
   12769              :             }
   12770              :         }
   12771              : 
   12772              :       /* Put types on constant and invariant SLP children.  */
   12773       137571 :       if (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
   12774       137571 :           || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype))
   12775              :         {
   12776            2 :           if (dump_enabled_p ())
   12777            2 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   12778              :                              "incompatible vector types for invariants\n");
   12779            2 :           return false;
   12780              :         }
   12781              : 
   12782       137569 :       vect_model_simple_cost (vinfo, 1 + (bitop2 != NOP_EXPR),
   12783              :                               slp_node, cost_vec);
   12784       137569 :       return true;
   12785              :     }
   12786              : 
   12787              :   /* Transform.  */
   12788              : 
   12789              :   /* Handle def.  */
   12790        12501 :   lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
   12791        12501 :   if (lhs)
   12792        12501 :     mask = vect_create_destination_var (lhs, mask_type);
   12793              : 
   12794        12501 :   vect_get_vec_defs (vinfo, slp_node, rhs1, &vec_oprnds0, rhs2, &vec_oprnds1);
   12795        12501 :   if (swap_p)
   12796           58 :     std::swap (vec_oprnds0, vec_oprnds1);
   12797              : 
   12798              :   /* Arguments are ready.  Create the new vector stmt.  */
   12799        31506 :   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
   12800              :     {
   12801        19005 :       gimple *new_stmt;
   12802        19005 :       vec_rhs2 = vec_oprnds1[i];
   12803              : 
   12804        19005 :       if (lhs)
   12805        19005 :         new_temp = make_ssa_name (mask);
   12806              :       else
   12807            0 :         new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
   12808        19005 :       if (bitop1 == NOP_EXPR)
   12809              :         {
   12810        18863 :           new_stmt = gimple_build_assign (new_temp, code,
   12811              :                                           vec_rhs1, vec_rhs2);
   12812        18863 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12813              :         }
   12814              :       else
   12815              :         {
   12816          142 :           if (bitop1 == BIT_NOT_EXPR)
   12817           84 :             new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
   12818              :           else
   12819           58 :             new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
   12820              :                                             vec_rhs2);
   12821          142 :           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12822          142 :           if (bitop2 != NOP_EXPR)
   12823              :             {
   12824           84 :               tree res = make_ssa_name (mask);
   12825           84 :               if (bitop2 == BIT_NOT_EXPR)
   12826            0 :                 new_stmt = gimple_build_assign (res, bitop2, new_temp);
   12827              :               else
   12828           84 :                 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
   12829              :                                                 new_temp);
   12830           84 :               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
   12831              :             }
   12832              :         }
   12833        19005 :       slp_node->push_vec_def (new_stmt);
   12834              :     }
   12835              : 
   12836        12501 :   vec_oprnds0.release ();
   12837        12501 :   vec_oprnds1.release ();
   12838              : 
   12839        12501 :   return true;
   12840              : }
   12841              : 
   12842              : /* vectorizable_comparison.
   12843              : 
   12844              :    Check if STMT_INFO is comparison expression that can be vectorized.
   12845              :    If COST_VEC is passed, calculate costs but don't change anything,
   12846              :    otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
   12847              :    it at GSI.
   12848              : 
   12849              :    Return true if STMT_INFO is vectorizable in this way.  */
   12850              : 
   12851              : static bool
   12852       653914 : vectorizable_comparison (vec_info *vinfo,
   12853              :                          stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   12854              :                          slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   12855              : {
   12856       653914 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   12857              : 
   12858       653914 :   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
   12859              :     return false;
   12860              : 
   12861       653914 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
   12862              :     return false;
   12863              : 
   12864       856053 :   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
   12865       349605 :   if (!stmt)
   12866              :     return false;
   12867              : 
   12868       349605 :   enum tree_code code = gimple_assign_rhs_code (stmt);
   12869       349605 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
   12870       349605 :   if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
   12871              :                                   slp_node, cost_vec))
   12872              :     return false;
   12873              : 
   12874       147466 :   if (cost_vec)
   12875       134965 :     SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
   12876              : 
   12877              :   return true;
   12878              : }
   12879              : 
   12880              : /* Check to see if the target supports any of the compare and branch optabs for
   12881              :    vectors with MODE as these would be required when expanding.  */
   12882              : static bool
   12883        61309 : supports_vector_compare_and_branch (loop_vec_info loop_vinfo, machine_mode mode)
   12884              : {
   12885        61309 :   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
   12886        61309 :   bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
   12887              : 
   12888              :   /* The vectorizer only produces vec_cbranch_any_optab directly.  So only
   12889              :      check for support for that or vec_cbranch_any_optab when masked.
   12890              :      We can't produce vcond_cbranch_any directly from the vectorizer as we
   12891              :      want to keep gimple_cond as the GIMPLE representation.  But we'll fold
   12892              :      it in expand.  For that reason we require a backend to support the
   12893              :      unconditional vector cbranch optab if they support the conditional one,
   12894              :      which is just an optimization on the unconditional one.  */
   12895        61309 :   if (masked_loop_p
   12896        61309 :       && direct_optab_handler (cond_vec_cbranch_any_optab, mode)
   12897              :                 != CODE_FOR_nothing)
   12898              :     return true;
   12899        61309 :   else if (len_loop_p
   12900        61309 :            && direct_optab_handler (cond_len_vec_cbranch_any_optab, mode)
   12901              :                 != CODE_FOR_nothing)
   12902              :     return true;
   12903        61309 :   else if (!masked_loop_p && !len_loop_p
   12904       122618 :            && direct_optab_handler (vec_cbranch_any_optab, mode)
   12905              :                 != CODE_FOR_nothing)
   12906              :     return true;
   12907              : 
   12908              :   /* The target can implement cbranch to distinguish between boolean vector
   12909              :      types and data types if they don't have a different mode for both.  */
   12910        61309 :   return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing;
   12911              : }
   12912              : 
   12913              : /* Determine the type to use for early break vectorization's scalar IV.  If
   12914              :    no type is possible return false.  */
   12915              : 
   12916              : static bool
   12917         2604 : vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo)
   12918              : {
   12919              :   /* Check if we have a usable scalar IV type for vectorization.  */
   12920         2604 :   tree iters_vf_type = sizetype;
   12921         2604 :   if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
   12922              :     {
   12923              :       /* Find the type with the minimum precision we can use
   12924              :          for the scalar IV.  */
   12925         2381 :       tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
   12926              : 
   12927              :       /* Work out how many bits we need to represent the limit.  */
   12928         2381 :       unsigned int min_ni_width
   12929         2381 :         = vect_min_prec_for_max_niters (loop_vinfo, 1);
   12930              : 
   12931              :       /* Check if we're using PFA, if so we need a signed IV and an
   12932              :          extra bit for the sign.  */
   12933         2381 :       if (TYPE_UNSIGNED (cand_type)
   12934         2381 :           && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
   12935         3925 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
   12936          156 :         min_ni_width += 1;
   12937              : 
   12938         2381 :       if (TYPE_PRECISION (cand_type) >= min_ni_width)
   12939         2308 :         iters_vf_type = unsigned_type_for (cand_type);
   12940              :       else
   12941              :         {
   12942           73 :           opt_scalar_int_mode cmp_mode_iter;
   12943           73 :           tree iv_type = NULL_TREE;
   12944          357 :           FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
   12945              :             {
   12946          357 :               auto cmp_mode = cmp_mode_iter.require ();
   12947          357 :               unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
   12948          357 :               if (cmp_bits >= min_ni_width
   12949          357 :                   && targetm.scalar_mode_supported_p (cmp_mode))
   12950              :                 {
   12951           73 :                   iv_type = build_nonstandard_integer_type (cmp_bits, true);
   12952           73 :                   if (iv_type)
   12953              :                     break;
   12954              :                 }
   12955              :             }
   12956              : 
   12957           73 :           if (!iv_type)
   12958              :             {
   12959            0 :               if (dump_enabled_p ())
   12960            0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   12961              :                        "can't vectorize early exit because the "
   12962              :                        "target doesn't support a scalar type wide "
   12963              :                        "wide enough to hold niters.\n");
   12964            0 :               return false;
   12965              :             }
   12966           73 :           iters_vf_type = iv_type;
   12967              :         }
   12968              :     }
   12969              : 
   12970         2604 :   LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type;
   12971         2604 :   return true;
   12972              : }
   12973              : 
   12974              : /* Check to see if the current early break given in STMT_INFO is valid for
   12975              :    vectorization.  */
   12976              : 
   12977              : bool
   12978       243630 : vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
   12979              :                          gimple_stmt_iterator *gsi,
   12980              :                          slp_tree slp_node, stmt_vector_for_cost *cost_vec)
   12981              : {
   12982       243630 :   if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
   12983              :     return false;
   12984              : 
   12985        62879 :   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
   12986              :     return false;
   12987              : 
   12988        62879 :   if (!STMT_VINFO_RELEVANT_P (stmt_info))
   12989              :     return false;
   12990              : 
   12991        62879 :   DUMP_VECT_SCOPE ("vectorizable_early_exit");
   12992              : 
   12993        62879 :   auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
   12994              : 
   12995              :   /* For SLP we don't want to use the type of the operands of the SLP node, when
   12996              :      vectorizing using SLP slp_node will be the children of the gcond and we
   12997              :      want to use the type of the direct children which since the gcond is root
   12998              :      will be the current node, rather than a child node as vect_is_simple_use
   12999              :      assumes.  */
   13000        62879 :   tree vectype = SLP_TREE_VECTYPE (slp_node);
   13001        62879 :   if (!vectype)
   13002              :     return false;
   13003              : 
   13004        62879 :   machine_mode mode = TYPE_MODE (vectype);
   13005        62879 :   int vec_num = vect_get_num_copies (loop_vinfo, slp_node);
   13006              : 
   13007        62879 :   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   13008        62879 :   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   13009        62879 :   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
   13010        62879 :   bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
   13011              : 
   13012              :   /* Now build the new conditional.  Pattern gimple_conds get dropped during
   13013              :      codegen so we must replace the original insn.  */
   13014        62879 :   gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
   13015        62879 :   gcond *cond_stmt = as_a <gcond *>(orig_stmt);
   13016              : 
   13017        62879 :   tree vectype_out = vectype;
   13018        62879 :   auto bb = gimple_bb (cond_stmt);
   13019        62879 :   edge exit_true_edge = EDGE_SUCC (bb, 0);
   13020        62879 :   if (exit_true_edge->flags & EDGE_FALSE_VALUE)
   13021          660 :     exit_true_edge = EDGE_SUCC (bb, 1);
   13022        62879 :   gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
   13023              : 
   13024              :   /* When vectorizing we assume that if the branch edge is taken that we're
   13025              :      exiting the loop.  This is not however always the case as the compiler will
   13026              :      rewrite conditions to always be a comparison against 0.  To do this it
   13027              :      sometimes flips the edges.  This is fine for scalar,  but for vector we
   13028              :      then have to negate the result of the test, as we're still assuming that if
   13029              :      you take the branch edge that we found the exit condition.  i.e. we need to
   13030              :      know whether we are generating a `forall` or an `exist` condition.  */
   13031       125758 :   bool flipped = flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
   13032        62879 :                                         exit_true_edge->dest);
   13033              : 
   13034              :   /* See if we support ADDHN and use that for the reduction.  */
   13035        62879 :   internal_fn ifn = IFN_VEC_TRUNC_ADD_HIGH;
   13036        62879 :   bool addhn_supported_p
   13037        62879 :     = direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_BOTH);
   13038        62879 :   tree narrow_type = NULL_TREE;
   13039        62879 :   if (addhn_supported_p)
   13040              :     {
   13041              :       /* Calculate the narrowing type for the result.  */
   13042            0 :       auto halfprec = TYPE_PRECISION (TREE_TYPE (vectype)) / 2;
   13043            0 :       auto unsignedp = TYPE_UNSIGNED (TREE_TYPE (vectype));
   13044            0 :       tree itype = build_nonstandard_integer_type (halfprec, unsignedp);
   13045            0 :       tree tmp_type = build_vector_type (itype, TYPE_VECTOR_SUBPARTS (vectype));
   13046            0 :       narrow_type = truth_type_for (tmp_type);
   13047              : 
   13048            0 :       if (!supports_vector_compare_and_branch (loop_vinfo,
   13049            0 :                                                TYPE_MODE (narrow_type)))
   13050              :         {
   13051            0 :           if (dump_enabled_p ())
   13052            0 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   13053              :                                "can't use ADDHN reduction because cbranch for "
   13054              :                                "the narrowed type is not supported by the "
   13055              :                                "target.\n");
   13056              :           addhn_supported_p = false;
   13057              :         }
   13058              :     }
   13059              : 
   13060              :   /* Analyze only.  */
   13061        62879 :   if (cost_vec)
   13062              :     {
   13063        61309 :       if (!addhn_supported_p
   13064        61309 :           && !supports_vector_compare_and_branch (loop_vinfo, mode))
   13065              :         {
   13066        58705 :           if (dump_enabled_p ())
   13067          597 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   13068              :                                "can't vectorize early exit because the "
   13069              :                                "target doesn't support flag setting vector "
   13070              :                                "comparisons.\n");
   13071        58705 :           return false;
   13072              :         }
   13073              : 
   13074         2604 :       if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi,
   13075              :                                       slp_node, cost_vec))
   13076              :         return false;
   13077              : 
   13078         2604 :       if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
   13079              :         {
   13080         1544 :           if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
   13081              :                                               OPTIMIZE_FOR_SPEED))
   13082            0 :             vect_record_loop_len (loop_vinfo, lens, vec_num, vectype, 1);
   13083              :           else
   13084         1544 :             vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
   13085              :         }
   13086              : 
   13087         2604 :       if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo))
   13088              :         return false;
   13089              : 
   13090              :       return true;
   13091              :     }
   13092              : 
   13093              :   /* Transform.  */
   13094              : 
   13095         1570 :   tree new_temp = NULL_TREE;
   13096         1570 :   gimple *new_stmt = NULL;
   13097              : 
   13098         1570 :   if (dump_enabled_p ())
   13099          405 :     dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
   13100              : 
   13101              :   /* For SLP we don't do codegen of the body starting from the gcond, the gconds are
   13102              :      roots and so by the time we get to them we have already codegened the SLP tree
   13103              :      and so we shouldn't try to do so again.  The arguments have already been
   13104              :      vectorized.  It's not very clean to do this here, But the masking code below is
   13105              :      complex and this keeps it all in one place to ease fixes and backports.  Once we
   13106              :      drop the non-SLP loop vect or split vectorizable_* this can be simplified.  */
   13107              : 
   13108         1570 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
   13109         1570 :   basic_block cond_bb = gimple_bb (stmt);
   13110         1570 :   gimple_stmt_iterator  cond_gsi = gsi_last_bb (cond_bb);
   13111              : 
   13112         1570 :   auto_vec<tree> stmts;
   13113         1570 :   stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
   13114              : 
   13115              :   /* If we're comparing against a previous forall we need to negate the results
   13116              :      before we do the final comparison or reduction.  */
   13117         1570 :   if (flipped)
   13118              :     {
   13119              :       /* Rewrite the if(all(mask)) into if (!all(mask)) which is the same as
   13120              :          if (any(~mask)) by negating the masks and flipping the branches.
   13121              : 
   13122              :         1. For unmasked loops we simply reduce the ~mask.
   13123              :         2. For masked loops we reduce (~mask & loop_mask) which is the same as
   13124              :            doing (mask & loop_mask) ^ loop_mask.  */
   13125          294 :       for (unsigned i = 0; i < stmts.length (); i++)
   13126              :         {
   13127          173 :           tree inv_lhs = make_temp_ssa_name (vectype, NULL, "vexit_inv");
   13128          173 :           auto inv_stmt = gimple_build_assign (inv_lhs, BIT_NOT_EXPR, stmts[i]);
   13129          173 :           vect_finish_stmt_generation (loop_vinfo, stmt_info, inv_stmt,
   13130              :                                        &cond_gsi);
   13131          173 :           stmts[i] = inv_lhs;
   13132              :         }
   13133              : 
   13134          121 :       EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
   13135          121 :       EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
   13136              :     }
   13137              : 
   13138              :   /* Determine if we need to reduce the final value.  */
   13139         1570 :   if (stmts.length () > 1)
   13140              :     {
   13141              :       /* We build the reductions in a way to maintain as much parallelism as
   13142              :          possible.  */
   13143          141 :       auto_vec<tree> workset (stmts.length ());
   13144              : 
   13145              :       /* Mask the statements as we queue them up.  Normally we loop over
   13146              :          vec_num,  but since we inspect the exact results of vectorization
   13147              :          we don't need to and instead can just use the stmts themselves.  */
   13148          141 :       if (masked_loop_p)
   13149            0 :         for (unsigned i = 0; i < stmts.length (); i++)
   13150              :           {
   13151            0 :             tree stmt_mask
   13152            0 :               = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
   13153              :                                     vectype, i);
   13154            0 :             stmt_mask
   13155            0 :               = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
   13156            0 :                                   stmts[i], &cond_gsi);
   13157            0 :             workset.quick_push (stmt_mask);
   13158              :           }
   13159          141 :       else if (len_loop_p)
   13160            0 :         for (unsigned i = 0; i < stmts.length (); i++)
   13161              :           {
   13162            0 :             tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
   13163              :                                                     lens, vec_num,
   13164            0 :                                                     vectype, stmts[i], i, 1);
   13165              : 
   13166            0 :             workset.quick_push (len_mask);
   13167              :           }
   13168              :       else
   13169          141 :         workset.splice (stmts);
   13170              : 
   13171          430 :       while (workset.length () > 1)
   13172              :         {
   13173          289 :           tree arg0 = workset.pop ();
   13174          289 :           tree arg1 = workset.pop ();
   13175          289 :           if (addhn_supported_p && workset.length () == 0)
   13176              :             {
   13177            0 :               new_stmt = gimple_build_call_internal (ifn, 2, arg0, arg1);
   13178            0 :               vectype_out = narrow_type;
   13179            0 :               new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
   13180            0 :               gimple_call_set_lhs (as_a <gcall *> (new_stmt), new_temp);
   13181            0 :               gimple_call_set_nothrow (as_a <gcall *> (new_stmt), true);
   13182              :             }
   13183              :           else
   13184              :             {
   13185          289 :               new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
   13186          289 :               new_stmt
   13187          289 :                 = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
   13188              :             }
   13189          289 :           vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
   13190              :                                        &cond_gsi);
   13191          289 :           workset.quick_insert (0, new_temp);
   13192              :         }
   13193          141 :     }
   13194              :   else
   13195              :     {
   13196         1429 :       new_temp = stmts[0];
   13197         1429 :       if (masked_loop_p)
   13198              :         {
   13199            0 :           tree mask
   13200            0 :             = vect_get_loop_mask (loop_vinfo, gsi, masks, 1, vectype, 0);
   13201            0 :           new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
   13202              :                                        new_temp, &cond_gsi);
   13203              :         }
   13204         1429 :       else if (len_loop_p)
   13205            0 :         new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
   13206              :                                            1, vectype, new_temp, 0, 1);
   13207              :     }
   13208              : 
   13209         1570 :   gcc_assert (new_temp);
   13210              : 
   13211         1570 :   tree cst = build_zero_cst (vectype_out);
   13212         1570 :   gimple_cond_set_condition (cond_stmt, NE_EXPR, new_temp, cst);
   13213         1570 :   update_stmt (orig_stmt);
   13214              : 
   13215              :   /* ??? */
   13216         1570 :   SLP_TREE_VEC_DEFS (slp_node).truncate (0);
   13217              : 
   13218         1570 :   return true;
   13219         1570 : }
   13220              : 
   13221              : /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
   13222              :    can handle all live statements in the node.  Otherwise return true
   13223              :    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
   13224              :    VEC_STMT_P is as for vectorizable_live_operation.  */
   13225              : 
   13226              : static bool
   13227      1292265 : can_vectorize_live_stmts (vec_info *vinfo,
   13228              :                           slp_tree slp_node, slp_instance slp_node_instance,
   13229              :                           bool vec_stmt_p,
   13230              :                           stmt_vector_for_cost *cost_vec)
   13231              : {
   13232      1292265 :   stmt_vec_info slp_stmt_info;
   13233      1292265 :   unsigned int i;
   13234      2725559 :   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
   13235              :     {
   13236      1433294 :       if (slp_stmt_info
   13237      1417200 :           && STMT_VINFO_LIVE_P (slp_stmt_info)
   13238      1568414 :           && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
   13239              :                                            slp_node_instance, i,
   13240              :                                            vec_stmt_p, cost_vec))
   13241              :         return false;
   13242              :     }
   13243              : 
   13244              :   return true;
   13245              : }
   13246              : 
   13247              : /* Make sure the statement is vectorizable.  */
   13248              : 
   13249              : opt_result
   13250      2663522 : vect_analyze_stmt (vec_info *vinfo,
   13251              :                    slp_tree node, slp_instance node_instance,
   13252              :                    stmt_vector_for_cost *cost_vec)
   13253              : {
   13254      2663522 :   stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
   13255      2663522 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   13256      2663522 :   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   13257      2663522 :   bool ok;
   13258              : 
   13259      2663522 :   if (dump_enabled_p ())
   13260       100596 :     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
   13261              :                      stmt_info->stmt);
   13262              : 
   13263      5041511 :   if (gimple_has_volatile_ops (stmt_info->stmt))
   13264              :     {
   13265              :       /* ???  This shouldn't really happen, volatile stmts should
   13266              :          not end up in the SLP graph.  */
   13267            0 :       return opt_result::failure_at (stmt_info->stmt,
   13268              :                                      "not vectorized:"
   13269              :                                      " stmt has volatile operands: %G\n",
   13270              :                                      stmt_info->stmt);
   13271              :     }
   13272              : 
   13273              :   /* Skip stmts that do not need to be vectorized.  */
   13274      2663522 :   if (!STMT_VINFO_RELEVANT_P (stmt_info)
   13275            0 :       && !STMT_VINFO_LIVE_P (stmt_info))
   13276              :     {
   13277            0 :       if (dump_enabled_p ())
   13278            0 :         dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
   13279              : 
   13280              :       /* ???  This shouldn't really happen, irrelevant stmts should
   13281              :          not end up in the SLP graph.  */
   13282            0 :       return opt_result::failure_at (stmt_info->stmt,
   13283              :                                      "not vectorized:"
   13284              :                                      " irrelevant stmt as SLP node %p "
   13285              :                                      "representative.\n",
   13286              :                                      (void *)node);
   13287              :     }
   13288              : 
   13289      2663522 :   switch (STMT_VINFO_DEF_TYPE (stmt_info))
   13290              :     {
   13291              :       case vect_internal_def:
   13292              :       case vect_condition_def:
   13293              :         break;
   13294              : 
   13295        84163 :       case vect_reduction_def:
   13296        84163 :       case vect_nested_cycle:
   13297        84163 :          gcc_assert (!bb_vinfo
   13298              :                      && (relevance == vect_used_in_outer
   13299              :                          || relevance == vect_used_in_outer_by_reduction
   13300              :                          || relevance == vect_used_by_reduction
   13301              :                          || relevance == vect_unused_in_scope
   13302              :                          || relevance == vect_used_only_live));
   13303              :          break;
   13304              : 
   13305          322 :       case vect_double_reduction_def:
   13306          322 :         gcc_assert (!bb_vinfo && node);
   13307              :         break;
   13308              : 
   13309       150192 :       case vect_induction_def:
   13310       150192 :       case vect_first_order_recurrence:
   13311       150192 :         gcc_assert (!bb_vinfo);
   13312              :         break;
   13313              : 
   13314            0 :       case vect_constant_def:
   13315            0 :       case vect_external_def:
   13316            0 :       case vect_unknown_def_type:
   13317            0 :       default:
   13318            0 :         gcc_unreachable ();
   13319              :     }
   13320              : 
   13321      2663522 :   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
   13322      2663522 :   STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
   13323              : 
   13324      2663522 :   if (STMT_VINFO_RELEVANT_P (stmt_info))
   13325              :     {
   13326      2663522 :       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
   13327      2663522 :       gcc_assert (SLP_TREE_VECTYPE (node)
   13328              :                   || gimple_code (stmt_info->stmt) == GIMPLE_COND
   13329              :                   || (call && gimple_call_lhs (call) == NULL_TREE));
   13330              :     }
   13331              : 
   13332      2663522 :   ok = true;
   13333      2663522 :   if (bb_vinfo
   13334      1473016 :       || (STMT_VINFO_RELEVANT_P (stmt_info)
   13335            0 :           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
   13336              :     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
   13337              :        -mveclibabi= takes preference over library functions with
   13338              :        the simd attribute.  */
   13339      2663522 :     ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
   13340      2656638 :           || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node,
   13341              :                                            cost_vec)
   13342      2656171 :           || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec)
   13343      2572838 :           || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec)
   13344      2036019 :           || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec)
   13345      1966846 :           || vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec)
   13346      1532013 :           || vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec)
   13347       717488 :           || vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec)
   13348       667733 :           || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec)
   13349       641413 :           || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec)
   13350       506448 :           || (bb_vinfo
   13351       124242 :               && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec))
   13352      3112047 :           || (is_a <loop_vec_info> (vinfo)
   13353       382206 :               && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
   13354              :                                               stmt_info, node, cost_vec)
   13355       381490 :                   || vectorizable_reduction (as_a <loop_vec_info> (vinfo),
   13356              :                                              stmt_info,
   13357              :                                              node, node_instance, cost_vec)
   13358       299868 :                   || vectorizable_induction (as_a <loop_vec_info> (vinfo),
   13359              :                                              stmt_info, node, cost_vec)
   13360       181833 :                   || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
   13361              :                                           stmt_info, node)
   13362       181012 :                   || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
   13363              :                                           stmt_info, node, cost_vec)
   13364       180751 :                   || vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
   13365              :                                               stmt_info, NULL, node,
   13366              :                                               cost_vec))));
   13367              : 
   13368      2663522 :   STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
   13369              : 
   13370      2416452 :   if (!ok)
   13371       247070 :     return opt_result::failure_at (stmt_info->stmt,
   13372              :                                    "not vectorized:"
   13373              :                                    " relevant stmt not supported: %G",
   13374              :                                    stmt_info->stmt);
   13375              : 
   13376              :   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
   13377              :       need extra handling, except for vectorizable reductions.  */
   13378      2416452 :   if (!bb_vinfo
   13379      1292265 :       && (SLP_TREE_TYPE (node) != lc_phi_info_type
   13380          821 :           || SLP_TREE_DEF_TYPE (node) == vect_internal_def)
   13381      1292265 :       && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node))
   13382      3708717 :       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
   13383              :                                     node, node_instance,
   13384              :                                     false, cost_vec))
   13385            0 :     return opt_result::failure_at (stmt_info->stmt,
   13386              :                                    "not vectorized:"
   13387              :                                    " live stmt not supported: %G",
   13388              :                                    stmt_info->stmt);
   13389              : 
   13390      2416452 :   return opt_result::success ();
   13391              : }
   13392              : 
   13393              : 
   13394              : /* Function vect_transform_stmt.
   13395              : 
   13396              :    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
   13397              : 
   13398              : bool
   13399       973797 : vect_transform_stmt (vec_info *vinfo,
   13400              :                      stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   13401              :                      slp_tree slp_node, slp_instance slp_node_instance)
   13402              : {
   13403       973797 :   bool is_store = false;
   13404       973797 :   bool done;
   13405              : 
   13406       973797 :   gcc_assert (slp_node);
   13407              : 
   13408       973797 :   if (stmt_info)
   13409       972960 :     STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
   13410              : 
   13411       973797 :   switch (SLP_TREE_TYPE (slp_node))
   13412              :     {
   13413        22892 :     case type_demotion_vec_info_type:
   13414        22892 :     case type_promotion_vec_info_type:
   13415        22892 :     case type_conversion_vec_info_type:
   13416        22892 :       done = vectorizable_conversion (vinfo, stmt_info, gsi, slp_node, NULL);
   13417        22892 :       gcc_assert (done);
   13418              :       break;
   13419              : 
   13420        16311 :     case induc_vec_info_type:
   13421        16311 :       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
   13422              :                                      stmt_info, slp_node, NULL);
   13423        16311 :       gcc_assert (done);
   13424              :       break;
   13425              : 
   13426         8560 :     case shift_vec_info_type:
   13427         8560 :       done = vectorizable_shift (vinfo, stmt_info, gsi, slp_node, NULL);
   13428         8560 :       gcc_assert (done);
   13429              :       break;
   13430              : 
   13431       114588 :     case op_vec_info_type:
   13432       114588 :       done = vectorizable_operation (vinfo, stmt_info, gsi, slp_node, NULL);
   13433       114588 :       gcc_assert (done);
   13434              :       break;
   13435              : 
   13436        16024 :     case assignment_vec_info_type:
   13437        16024 :       done = vectorizable_assignment (vinfo, stmt_info, gsi, slp_node, NULL);
   13438        16024 :       gcc_assert (done);
   13439              :       break;
   13440              : 
   13441       166677 :     case load_vec_info_type:
   13442       166677 :       done = vectorizable_load (vinfo, stmt_info, gsi, slp_node, NULL);
   13443       166677 :       gcc_assert (done);
   13444              :       break;
   13445              : 
   13446       545722 :     case store_vec_info_type:
   13447       545722 :       done = vectorizable_store (vinfo, stmt_info, gsi, slp_node, NULL);
   13448       545722 :       gcc_assert (done);
   13449              :       is_store = true;
   13450              :       break;
   13451              : 
   13452         8550 :     case condition_vec_info_type:
   13453         8550 :       done = vectorizable_condition (vinfo, stmt_info, gsi, slp_node, NULL);
   13454         8550 :       gcc_assert (done);
   13455              :       break;
   13456              : 
   13457        12501 :     case comparison_vec_info_type:
   13458        12501 :       done = vectorizable_comparison (vinfo, stmt_info, gsi, slp_node, NULL);
   13459        12501 :       gcc_assert (done);
   13460              :       break;
   13461              : 
   13462         4193 :     case call_vec_info_type:
   13463         4193 :       done = vectorizable_call (vinfo, stmt_info, gsi, slp_node, NULL);
   13464         4193 :       break;
   13465              : 
   13466          362 :     case call_simd_clone_vec_info_type:
   13467          362 :       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi,
   13468              :                                            slp_node, NULL);
   13469          362 :       break;
   13470              : 
   13471         2636 :     case reduc_vec_info_type:
   13472         2636 :       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
   13473              :                                        gsi, slp_node);
   13474         2636 :       gcc_assert (done);
   13475              :       break;
   13476              : 
   13477        23727 :     case cycle_phi_info_type:
   13478        23727 :       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
   13479              :                                        slp_node, slp_node_instance);
   13480        23727 :       gcc_assert (done);
   13481              :       break;
   13482              : 
   13483          530 :     case lc_phi_info_type:
   13484          530 :       done = vect_transform_lc_phi (as_a <loop_vec_info> (vinfo),
   13485              :                                     stmt_info, slp_node);
   13486          530 :       gcc_assert (done);
   13487              :       break;
   13488              : 
   13489           45 :     case recurr_info_type:
   13490           45 :       done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
   13491              :                                   stmt_info, slp_node, NULL);
   13492           45 :       gcc_assert (done);
   13493              :       break;
   13494              : 
   13495        14146 :     case phi_info_type:
   13496        14146 :       done = vectorizable_phi (as_a <bb_vec_info> (vinfo),
   13497              :                                stmt_info, slp_node, NULL);
   13498        14146 :       gcc_assert (done);
   13499              :       break;
   13500              : 
   13501            0 :     case loop_exit_ctrl_vec_info_type:
   13502            0 :       done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
   13503              :                                       stmt_info, gsi, slp_node, NULL);
   13504            0 :       gcc_assert (done);
   13505              :       break;
   13506              : 
   13507        16333 :     case permute_info_type:
   13508        16333 :       done = vectorizable_slp_permutation (vinfo, gsi, slp_node, NULL);
   13509        16333 :       gcc_assert (done);
   13510              :       break;
   13511              : 
   13512            0 :     default:
   13513            0 :       if (!STMT_VINFO_LIVE_P (stmt_info))
   13514              :         {
   13515            0 :           if (dump_enabled_p ())
   13516            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   13517              :                              "stmt not supported.\n");
   13518            0 :           gcc_unreachable ();
   13519              :         }
   13520       973797 :       done = true;
   13521              :     }
   13522              : 
   13523       973797 :   if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
   13524       428075 :       && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node)))
   13525              :     {
   13526              :       /* Handle stmts whose DEF is used outside the loop-nest that is
   13527              :          being vectorized.  */
   13528       577937 :       for (unsigned lane : SLP_TREE_LIVE_LANES (slp_node))
   13529              :         {
   13530        61744 :           stmt_vec_info slp_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[lane];
   13531        61744 :           done = vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
   13532              :                                               slp_node_instance, lane,
   13533              :                                               true, NULL);
   13534        61744 :           gcc_assert (done);
   13535              :         }
   13536              :     }
   13537              : 
   13538       973797 :   return is_store;
   13539              : }
   13540              : 
   13541              : 
   13542              : /* Remove a group of stores (for SLP or interleaving), free their
   13543              :    stmt_vec_info.  */
   13544              : 
   13545              : void
   13546            0 : vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
   13547              : {
   13548            0 :   stmt_vec_info next_stmt_info = first_stmt_info;
   13549              : 
   13550            0 :   while (next_stmt_info)
   13551              :     {
   13552            0 :       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
   13553            0 :       next_stmt_info = vect_orig_stmt (next_stmt_info);
   13554              :       /* Free the attached stmt_vec_info and remove the stmt.  */
   13555            0 :       vinfo->remove_stmt (next_stmt_info);
   13556            0 :       next_stmt_info = tmp;
   13557              :     }
   13558            0 : }
   13559              : 
   13560              : /* If NUNITS is nonzero, return a vector type that contains NUNITS
   13561              :    elements of type SCALAR_TYPE, or null if the target doesn't support
   13562              :    such a type.
   13563              : 
   13564              :    If NUNITS is zero, return a vector type that contains elements of
   13565              :    type SCALAR_TYPE, choosing whichever vector size the target prefers.
   13566              : 
   13567              :    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
   13568              :    for this vectorization region and want to "autodetect" the best choice.
   13569              :    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
   13570              :    and we want the new type to be interoperable with it.   PREVAILING_MODE
   13571              :    in this case can be a scalar integer mode or a vector mode; when it
   13572              :    is a vector mode, the function acts like a tree-level version of
   13573              :    related_vector_mode.  */
   13574              : 
   13575              : tree
   13576     30868752 : get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
   13577              :                                      tree scalar_type, poly_uint64 nunits)
   13578              : {
   13579     30868752 :   tree orig_scalar_type = scalar_type;
   13580     30868752 :   scalar_mode inner_mode;
   13581     30868752 :   machine_mode simd_mode;
   13582     30868752 :   tree vectype;
   13583              : 
   13584     30868752 :   if ((!INTEGRAL_TYPE_P (scalar_type)
   13585     10449276 :        && !POINTER_TYPE_P (scalar_type)
   13586      1792826 :        && !SCALAR_FLOAT_TYPE_P (scalar_type))
   13587     40813796 :       || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
   13588      1288676 :           && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
   13589       507446 :     return NULL_TREE;
   13590              : 
   13591     30361306 :   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
   13592              : 
   13593              :   /* Interoperability between modes requires one to be a constant multiple
   13594              :      of the other, so that the number of vectors required for each operation
   13595              :      is a compile-time constant.  */
   13596     30361306 :   if (prevailing_mode != VOIDmode
   13597     29239369 :       && !constant_multiple_p (nunits * nbytes,
   13598     29239369 :                                GET_MODE_SIZE (prevailing_mode))
   13599     31840275 :       && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
   13600      1478969 :                                nunits * nbytes))
   13601              :     return NULL_TREE;
   13602              : 
   13603              :   /* For vector types of elements whose mode precision doesn't
   13604              :      match their types precision we use a element type of mode
   13605              :      precision.  The vectorization routines will have to make sure
   13606              :      they support the proper result truncation/extension.
   13607              :      We also make sure to build vector types with INTEGER_TYPE
   13608              :      component type only.  */
   13609     30361306 :   if (INTEGRAL_TYPE_P (scalar_type)
   13610     50780700 :       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
   13611     18940967 :           || TREE_CODE (scalar_type) != INTEGER_TYPE))
   13612      1687312 :     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
   13613      1687312 :                                                   TYPE_UNSIGNED (scalar_type));
   13614              : 
   13615              :   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
   13616              :      When the component mode passes the above test simply use a type
   13617              :      corresponding to that mode.  The theory is that any use that
   13618              :      would cause problems with this will disable vectorization anyway.  */
   13619     28673994 :   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
   13620              :            && !INTEGRAL_TYPE_P (scalar_type))
   13621      8656450 :     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
   13622              : 
   13623              :   /* We can't build a vector type of elements with alignment bigger than
   13624              :      their size.  */
   13625     20017544 :   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
   13626       380420 :     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
   13627       190210 :                                                   TYPE_UNSIGNED (scalar_type));
   13628              : 
   13629              :   /* If we felt back to using the mode fail if there was
   13630              :      no scalar type for it.  */
   13631     30361306 :   if (scalar_type == NULL_TREE)
   13632              :     return NULL_TREE;
   13633              : 
   13634              :   /* If no prevailing mode was supplied, use the mode the target prefers.
   13635              :      Otherwise lookup a vector mode based on the prevailing mode.  */
   13636     30361306 :   if (prevailing_mode == VOIDmode)
   13637              :     {
   13638      1121937 :       gcc_assert (known_eq (nunits, 0U));
   13639      1121937 :       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
   13640      1121937 :       if (SCALAR_INT_MODE_P (simd_mode))
   13641              :         {
   13642              :           /* Traditional behavior is not to take the integer mode
   13643              :              literally, but simply to use it as a way of determining
   13644              :              the vector size.  It is up to mode_for_vector to decide
   13645              :              what the TYPE_MODE should be.
   13646              : 
   13647              :              Note that nunits == 1 is allowed in order to support single
   13648              :              element vector types.  */
   13649        58340 :           if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
   13650          550 :               || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
   13651        28620 :             return NULL_TREE;
   13652              :         }
   13653              :     }
   13654     29239369 :   else if (SCALAR_INT_MODE_P (prevailing_mode)
   13655     29239369 :            || !related_vector_mode (prevailing_mode,
   13656     27220739 :                                     inner_mode, nunits).exists (&simd_mode))
   13657              :     {
   13658              :       /* Fall back to using mode_for_vector, mostly in the hope of being
   13659              :          able to use an integer mode.  */
   13660      2018630 :       if (known_eq (nunits, 0U)
   13661      4716952 :           && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
   13662              :         return NULL_TREE;
   13663              : 
   13664       150886 :       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
   13665       140885 :         return NULL_TREE;
   13666              :     }
   13667              : 
   13668     28324057 :   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
   13669              : 
   13670              :   /* In cases where the mode was chosen by mode_for_vector, check that
   13671              :      the target actually supports the chosen mode, or that it at least
   13672              :      allows the vector mode to be replaced by a like-sized integer.  */
   13673     56648114 :   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
   13674     28334315 :       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
   13675              :     return NULL_TREE;
   13676              : 
   13677              :   /* Re-attach the address-space qualifier if we canonicalized the scalar
   13678              :      type.  */
   13679     28315978 :   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
   13680            5 :     return build_qualified_type
   13681            5 :              (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
   13682              : 
   13683              :   return vectype;
   13684              : }
   13685              : 
   13686              : /* Function get_vectype_for_scalar_type.
   13687              : 
   13688              :    Returns the vector type corresponding to SCALAR_TYPE as supported
   13689              :    by the target.  If GROUP_SIZE is nonzero and we're performing BB
   13690              :    vectorization, make sure that the number of elements in the vector
   13691              :    is no bigger than GROUP_SIZE.  */
   13692              : 
   13693              : tree
   13694     26394302 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
   13695              :                              unsigned int group_size)
   13696              : {
   13697              :   /* For BB vectorization, we should always have a group size once we've
   13698              :      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
   13699              :      are tentative requests during things like early data reference
   13700              :      analysis and pattern recognition.  */
   13701     26394302 :   if (is_a <bb_vec_info> (vinfo))
   13702     23498109 :     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
   13703              :   else
   13704              :     group_size = 0;
   13705              : 
   13706     26394302 :   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
   13707              :                                                       scalar_type);
   13708     26394302 :   if (vectype && vinfo->vector_mode == VOIDmode)
   13709      1046687 :     vinfo->vector_mode = TYPE_MODE (vectype);
   13710              : 
   13711              :   /* Register the natural choice of vector type, before the group size
   13712              :      has been applied.  */
   13713            0 :   if (vectype)
   13714     24003857 :     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
   13715              : 
   13716              :   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
   13717              :      try again with an explicit number of elements.  */
   13718     24003857 :   if (vectype
   13719     24003857 :       && group_size
   13720     26394302 :       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
   13721              :     {
   13722              :       /* Start with the biggest number of units that fits within
   13723              :          GROUP_SIZE and halve it until we find a valid vector type.
   13724              :          Usually either the first attempt will succeed or all will
   13725              :          fail (in the latter case because GROUP_SIZE is too small
   13726              :          for the target), but it's possible that a target could have
   13727              :          a hole between supported vector types.
   13728              : 
   13729              :          If GROUP_SIZE is not a power of 2, this has the effect of
   13730              :          trying the largest power of 2 that fits within the group,
   13731              :          even though the group is not a multiple of that vector size.
   13732              :          The BB vectorizer will then try to carve up the group into
   13733              :          smaller pieces.  */
   13734      3043572 :       unsigned int nunits = 1 << floor_log2 (group_size);
   13735      3043572 :       do
   13736              :         {
   13737      3043572 :           vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
   13738      3043572 :                                                          scalar_type, nunits);
   13739      3043572 :           nunits /= 2;
   13740              :         }
   13741      3043572 :       while (nunits > 1 && !vectype);
   13742              :     }
   13743              : 
   13744     26394302 :   return vectype;
   13745              : }
   13746              : 
   13747              : /* Return the vector type corresponding to SCALAR_TYPE as supported
   13748              :    by the target.  NODE, if nonnull, is the SLP tree node that will
   13749              :    use the returned vector type.  */
   13750              : 
   13751              : tree
   13752       162378 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
   13753              : {
   13754       162378 :   unsigned int group_size = 0;
   13755       162378 :   if (node)
   13756       162378 :     group_size = SLP_TREE_LANES (node);
   13757       162378 :   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   13758              : }
   13759              : 
   13760              : /* Function get_mask_type_for_scalar_type.
   13761              : 
   13762              :    Returns the mask type corresponding to a result of comparison
   13763              :    of vectors of specified SCALAR_TYPE as supported by target.
   13764              :    If GROUP_SIZE is nonzero and we're performing BB vectorization,
   13765              :    make sure that the number of elements in the vector is no bigger
   13766              :    than GROUP_SIZE.  */
   13767              : 
   13768              : tree
   13769      1101682 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
   13770              :                                unsigned int group_size)
   13771              : {
   13772      1101682 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   13773              : 
   13774      1101682 :   if (!vectype)
   13775              :     return NULL;
   13776              : 
   13777      1082282 :   return truth_type_for (vectype);
   13778              : }
   13779              : 
   13780              : /* Function get_mask_type_for_scalar_type.
   13781              : 
   13782              :    Returns the mask type corresponding to a result of comparison
   13783              :    of vectors of specified SCALAR_TYPE as supported by target.
   13784              :    NODE, if nonnull, is the SLP tree node that will use the returned
   13785              :    vector type.  */
   13786              : 
   13787              : tree
   13788           19 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
   13789              :                                slp_tree node)
   13790              : {
   13791           19 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
   13792              : 
   13793           19 :   if (!vectype)
   13794              :     return NULL;
   13795              : 
   13796           19 :   return truth_type_for (vectype);
   13797              : }
   13798              : 
   13799              : /* Function get_same_sized_vectype
   13800              : 
   13801              :    Returns a vector type corresponding to SCALAR_TYPE of size
   13802              :    VECTOR_TYPE if supported by the target.  */
   13803              : 
   13804              : tree
   13805       157942 : get_same_sized_vectype (tree scalar_type, tree vector_type)
   13806              : {
   13807       157942 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
   13808            0 :     return truth_type_for (vector_type);
   13809              : 
   13810       157942 :   poly_uint64 nunits;
   13811       315884 :   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
   13812       315884 :                    GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
   13813              :     return NULL_TREE;
   13814              : 
   13815       157942 :   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
   13816       157942 :                                               scalar_type, nunits);
   13817              : }
   13818              : 
   13819              : /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
   13820              :    would not change the chosen vector modes.  */
   13821              : 
   13822              : bool
   13823      1577802 : vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
   13824              : {
   13825      1577802 :   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
   13826      3598150 :        i != vinfo->used_vector_modes.end (); ++i)
   13827      1848220 :     if (!VECTOR_MODE_P (*i)
   13828      5544660 :         || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
   13829       838046 :       return false;
   13830       739756 :   return true;
   13831              : }
   13832              : 
   13833              : /* Return true if replacing VECTOR_MODE with ALT_VECTOR_MODE would not
   13834              :    change the chosen vector modes for analysis of a loop.  */
   13835              : 
   13836              : bool
   13837       383842 : vect_chooses_same_modes_p (machine_mode vector_mode,
   13838              :                            machine_mode alt_vector_mode)
   13839              : {
   13840        63380 :   return (VECTOR_MODE_P (vector_mode)
   13841       383842 :           && VECTOR_MODE_P (alt_vector_mode)
   13842       767684 :           && (related_vector_mode (vector_mode,
   13843              :                                    GET_MODE_INNER (alt_vector_mode))
   13844       383842 :               == alt_vector_mode)
   13845       410172 :           && (related_vector_mode (alt_vector_mode,
   13846              :                                    GET_MODE_INNER (vector_mode))
   13847        13165 :               == vector_mode));
   13848              : }
   13849              : 
   13850              : /* Function vect_is_simple_use.
   13851              : 
   13852              :    Input:
   13853              :    VINFO - the vect info of the loop or basic block that is being vectorized.
   13854              :    OPERAND - operand in the loop or bb.
   13855              :    Output:
   13856              :    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
   13857              :      case OPERAND is an SSA_NAME that is defined in the vectorizable region
   13858              :    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
   13859              :      the definition could be anywhere in the function
   13860              :    DT - the type of definition
   13861              : 
   13862              :    Returns whether a stmt with OPERAND can be vectorized.
   13863              :    For loops, supportable operands are constants, loop invariants, and operands
   13864              :    that are defined by the current iteration of the loop.  Unsupportable
   13865              :    operands are those that are defined by a previous iteration of the loop (as
   13866              :    is the case in reduction/induction computations).
   13867              :    For basic blocks, supportable operands are constants and bb invariants.
   13868              :    For now, operands defined outside the basic block are not supported.  */
   13869              : 
   13870              : bool
   13871     41810148 : vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
   13872              :                     stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
   13873              : {
   13874     41810148 :   if (def_stmt_info_out)
   13875     39610715 :     *def_stmt_info_out = NULL;
   13876     41810148 :   if (def_stmt_out)
   13877      9790853 :     *def_stmt_out = NULL;
   13878     41810148 :   *dt = vect_unknown_def_type;
   13879              : 
   13880     41810148 :   if (dump_enabled_p ())
   13881              :     {
   13882       767862 :       dump_printf_loc (MSG_NOTE, vect_location,
   13883              :                        "vect_is_simple_use: operand ");
   13884       767862 :       if (TREE_CODE (operand) == SSA_NAME
   13885       767862 :           && !SSA_NAME_IS_DEFAULT_DEF (operand))
   13886       704772 :         dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
   13887              :       else
   13888        63090 :         dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
   13889              :     }
   13890              : 
   13891     41810148 :   if (CONSTANT_CLASS_P (operand))
   13892      2799388 :     *dt = vect_constant_def;
   13893     39010760 :   else if (is_gimple_min_invariant (operand))
   13894       333419 :     *dt = vect_external_def;
   13895     38677341 :   else if (TREE_CODE (operand) != SSA_NAME)
   13896          976 :     *dt = vect_unknown_def_type;
   13897     38676365 :   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
   13898       505774 :     *dt = vect_external_def;
   13899              :   else
   13900              :     {
   13901     38170591 :       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
   13902     38170591 :       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
   13903     38170591 :       if (!stmt_vinfo)
   13904       838187 :         *dt = vect_external_def;
   13905              :       else
   13906              :         {
   13907     37332404 :           stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
   13908     37332404 :           def_stmt = stmt_vinfo->stmt;
   13909     37332404 :           *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
   13910     37332404 :           if (def_stmt_info_out)
   13911     35142017 :             *def_stmt_info_out = stmt_vinfo;
   13912              :         }
   13913     38170591 :       if (def_stmt_out)
   13914      9583578 :         *def_stmt_out = def_stmt;
   13915              :     }
   13916              : 
   13917     41810148 :   if (dump_enabled_p ())
   13918              :     {
   13919       767862 :       dump_printf (MSG_NOTE, ", type of def: ");
   13920       767862 :       switch (*dt)
   13921              :         {
   13922            0 :         case vect_uninitialized_def:
   13923            0 :           dump_printf (MSG_NOTE, "uninitialized\n");
   13924            0 :           break;
   13925        52362 :         case vect_constant_def:
   13926        52362 :           dump_printf (MSG_NOTE, "constant\n");
   13927        52362 :           break;
   13928        26314 :         case vect_external_def:
   13929        26314 :           dump_printf (MSG_NOTE, "external\n");
   13930        26314 :           break;
   13931       549381 :         case vect_internal_def:
   13932       549381 :           dump_printf (MSG_NOTE, "internal\n");
   13933       549381 :           break;
   13934       108263 :         case vect_induction_def:
   13935       108263 :           dump_printf (MSG_NOTE, "induction\n");
   13936       108263 :           break;
   13937        28177 :         case vect_reduction_def:
   13938        28177 :           dump_printf (MSG_NOTE, "reduction\n");
   13939        28177 :           break;
   13940          482 :         case vect_double_reduction_def:
   13941          482 :           dump_printf (MSG_NOTE, "double reduction\n");
   13942          482 :           break;
   13943         2173 :         case vect_nested_cycle:
   13944         2173 :           dump_printf (MSG_NOTE, "nested cycle\n");
   13945         2173 :           break;
   13946          276 :         case vect_first_order_recurrence:
   13947          276 :           dump_printf (MSG_NOTE, "first order recurrence\n");
   13948          276 :           break;
   13949            0 :         case vect_condition_def:
   13950            0 :           dump_printf (MSG_NOTE, "control flow\n");
   13951            0 :           break;
   13952          434 :         case vect_unknown_def_type:
   13953          434 :           dump_printf (MSG_NOTE, "unknown\n");
   13954          434 :           break;
   13955              :         }
   13956              :     }
   13957              : 
   13958     41810148 :   if (*dt == vect_unknown_def_type)
   13959              :     {
   13960        57297 :       if (dump_enabled_p ())
   13961          434 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   13962              :                          "Unsupported pattern.\n");
   13963        57297 :       return false;
   13964              :     }
   13965              : 
   13966              :   return true;
   13967              : }
   13968              : 
   13969              : /* Function vect_is_simple_use.
   13970              : 
   13971              :    Same as vect_is_simple_use but determines the operand by operand
   13972              :    position OPERAND from either STMT or SLP_NODE, filling in *OP
   13973              :    and *SLP_DEF (when SLP_NODE is not NULL).  */
   13974              : 
   13975              : bool
   13976      3844176 : vect_is_simple_use (vec_info *vinfo, slp_tree slp_node,
   13977              :                     unsigned operand, tree *op, slp_tree *slp_def,
   13978              :                     enum vect_def_type *dt,
   13979              :                     tree *vectype, stmt_vec_info *def_stmt_info_out)
   13980              : {
   13981      3844176 :   slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
   13982      3844176 :   *slp_def = child;
   13983      3844176 :   *vectype = SLP_TREE_VECTYPE (child);
   13984      3844176 :   if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
   13985              :     {
   13986              :       /* ???  VEC_PERM nodes might be intermediate and their lane value
   13987              :          have no representative (nor do we build a VEC_PERM stmt for
   13988              :          the actual operation).  Note for two-operator nodes we set
   13989              :          a representative but leave scalar stmts empty as we'd only
   13990              :          have one for a subset of lanes.  Ideally no caller would
   13991              :          require *op for internal defs.  */
   13992      2132590 :       if (SLP_TREE_REPRESENTATIVE (child))
   13993              :         {
   13994      2131793 :           *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
   13995      2131793 :           return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
   13996              :         }
   13997              :       else
   13998              :         {
   13999          797 :           gcc_assert (SLP_TREE_PERMUTE_P (child));
   14000          797 :           *op = error_mark_node;
   14001          797 :           *dt = vect_internal_def;
   14002          797 :           if (def_stmt_info_out)
   14003            0 :             *def_stmt_info_out = NULL;
   14004          797 :           return true;
   14005              :         }
   14006              :     }
   14007              :   else
   14008              :     {
   14009      1711586 :       if (def_stmt_info_out)
   14010        55502 :         *def_stmt_info_out = NULL;
   14011      1711586 :       *op = SLP_TREE_SCALAR_OPS (child)[0];
   14012      1711586 :       *dt = SLP_TREE_DEF_TYPE (child);
   14013      1711586 :       return true;
   14014              :     }
   14015              : }
   14016              : 
   14017              : /* If OP is not NULL and is external or constant update its vector
   14018              :    type with VECTYPE.  Returns true if successful or false if not,
   14019              :    for example when conflicting vector types are present.  */
   14020              : 
   14021              : bool
   14022      3495782 : vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
   14023              : {
   14024      3495782 :   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
   14025              :     return true;
   14026      1150283 :   if (SLP_TREE_VECTYPE (op))
   14027       103392 :     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
   14028              :   /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
   14029              :      should be handled by patters.  Allow vect_constant_def for now
   14030              :      as well as the trivial single-lane uniform vect_external_def case
   14031              :      both of which we code-generate reasonably.  */
   14032      1046891 :   if (VECTOR_BOOLEAN_TYPE_P (vectype)
   14033         1534 :       && SLP_TREE_DEF_TYPE (op) == vect_external_def
   14034      1048037 :       && SLP_TREE_LANES (op) > 1)
   14035              :     return false;
   14036      1046732 :   SLP_TREE_VECTYPE (op) = vectype;
   14037      1046732 :   return true;
   14038              : }
   14039              : 
   14040              : /* Function supportable_widening_operation
   14041              : 
   14042              :    Check whether an operation represented by the code CODE is a
   14043              :    widening operation that is supported by the target platform in
   14044              :    vector form (i.e., when operating on arguments of type VECTYPE_IN
   14045              :    producing a result of type VECTYPE_OUT).
   14046              : 
   14047              :    Widening operations we currently support are NOP (CONVERT), FLOAT,
   14048              :    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
   14049              :    are supported by the target platform either directly (via vector
   14050              :    tree-codes), or via target builtins.
   14051              : 
   14052              :    When EVENODD_OK then also lane-swizzling operations are considered.
   14053              : 
   14054              :    Output:
   14055              :    - CODE1 and CODE2 are codes of vector operations to be used when
   14056              :    vectorizing the operation, if available.
   14057              :    - MULTI_STEP_CVT determines the number of required intermediate steps in
   14058              :    case of multi-step conversion (like char->short->int - in that case
   14059              :    MULTI_STEP_CVT will be 1).
   14060              :    - INTERM_TYPES contains the intermediate type required to perform the
   14061              :    widening operation (short in the above example).  */
   14062              : 
   14063              : bool
   14064       485688 : supportable_widening_operation (code_helper code,
   14065              :                                 tree vectype_out, tree vectype_in,
   14066              :                                 bool evenodd_ok,
   14067              :                                 code_helper *code1,
   14068              :                                 code_helper *code2,
   14069              :                                 int *multi_step_cvt,
   14070              :                                 vec<tree> *interm_types)
   14071              : {
   14072       485688 :   machine_mode vec_mode;
   14073       485688 :   enum insn_code icode1, icode2;
   14074       485688 :   optab optab1 = unknown_optab, optab2 = unknown_optab;
   14075       485688 :   tree vectype = vectype_in;
   14076       485688 :   tree wide_vectype = vectype_out;
   14077       485688 :   tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
   14078       485688 :   int i;
   14079       485688 :   tree prev_type, intermediate_type;
   14080       485688 :   machine_mode intermediate_mode, prev_mode;
   14081       485688 :   optab optab3, optab4;
   14082              : 
   14083       485688 :   *multi_step_cvt = 0;
   14084              : 
   14085       485688 :   switch (code.safe_as_tree_code ())
   14086              :     {
   14087              :     case MAX_TREE_CODES:
   14088              :       /* Don't set c1 and c2 if code is not a tree_code.  */
   14089              :       break;
   14090              : 
   14091       186703 :     case WIDEN_MULT_EXPR:
   14092              :       /* The result of a vectorized widening operation usually requires
   14093              :          two vectors (because the widened results do not fit into one vector).
   14094              :          The generated vector results would normally be expected to be
   14095              :          generated in the same order as in the original scalar computation,
   14096              :          i.e. if 8 results are generated in each vector iteration, they are
   14097              :          to be organized as follows:
   14098              :                 vect1: [res1,res2,res3,res4],
   14099              :                 vect2: [res5,res6,res7,res8].
   14100              : 
   14101              :          However, in the special case that the result of the widening
   14102              :          operation is used in a reduction computation only, the order doesn't
   14103              :          matter (because when vectorizing a reduction we change the order of
   14104              :          the computation).  Some targets can take advantage of this and
   14105              :          generate more efficient code.  For example, targets like Altivec,
   14106              :          that support widen_mult using a sequence of {mult_even,mult_odd}
   14107              :          generate the following vectors:
   14108              :                 vect1: [res1,res3,res5,res7],
   14109              :                 vect2: [res2,res4,res6,res8].
   14110              : 
   14111              :          When vectorizing outer-loops, we execute the inner-loop sequentially
   14112              :          (each vectorized inner-loop iteration contributes to VF outer-loop
   14113              :          iterations in parallel).  We therefore don't allow to change the
   14114              :          order of the computation in the inner-loop during outer-loop
   14115              :          vectorization.  */
   14116              :       /* TODO: Another case in which order doesn't *really* matter is when we
   14117              :          widen and then contract again, e.g. (short)((int)x * y >> 8).
   14118              :          Normally, pack_trunc performs an even/odd permute, whereas the
   14119              :          repack from an even/odd expansion would be an interleave, which
   14120              :          would be significantly simpler for e.g. AVX2.  */
   14121              :       /* In any case, in order to avoid duplicating the code below, recurse
   14122              :          on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
   14123              :          are properly set up for the caller.  If we fail, we'll continue with
   14124              :          a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
   14125       186703 :       if (evenodd_ok
   14126       186703 :           && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
   14127              :                                              vectype_out, vectype_in,
   14128              :                                              evenodd_ok, code1,
   14129              :                                              code2, multi_step_cvt,
   14130              :                                              interm_types))
   14131        97993 :         return true;
   14132              :       c1 = VEC_WIDEN_MULT_LO_EXPR;
   14133              :       c2 = VEC_WIDEN_MULT_HI_EXPR;
   14134              :       break;
   14135              : 
   14136              :     case DOT_PROD_EXPR:
   14137       387695 :       c1 = DOT_PROD_EXPR;
   14138       387695 :       c2 = DOT_PROD_EXPR;
   14139              :       break;
   14140              : 
   14141            0 :     case SAD_EXPR:
   14142            0 :       c1 = SAD_EXPR;
   14143            0 :       c2 = SAD_EXPR;
   14144            0 :       break;
   14145              : 
   14146       184759 :     case VEC_WIDEN_MULT_EVEN_EXPR:
   14147              :       /* Support the recursion induced just above.  */
   14148       184759 :       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
   14149       184759 :       c2 = VEC_WIDEN_MULT_ODD_EXPR;
   14150       184759 :       break;
   14151              : 
   14152         9408 :     case WIDEN_LSHIFT_EXPR:
   14153         9408 :       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
   14154         9408 :       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
   14155         9408 :       break;
   14156              : 
   14157        40985 :     CASE_CONVERT:
   14158        40985 :       c1 = VEC_UNPACK_LO_EXPR;
   14159        40985 :       c2 = VEC_UNPACK_HI_EXPR;
   14160        40985 :       break;
   14161              : 
   14162         9195 :     case FLOAT_EXPR:
   14163         9195 :       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
   14164         9195 :       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
   14165         9195 :       break;
   14166              : 
   14167          119 :     case FIX_TRUNC_EXPR:
   14168          119 :       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
   14169          119 :       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
   14170          119 :       break;
   14171              : 
   14172            0 :     default:
   14173            0 :       gcc_unreachable ();
   14174              :     }
   14175              : 
   14176       387695 :   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
   14177              :     std::swap (c1, c2);
   14178              : 
   14179       387695 :   if (code == FIX_TRUNC_EXPR)
   14180              :     {
   14181              :       /* The signedness is determined from output operand.  */
   14182          119 :       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
   14183          119 :       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
   14184              :     }
   14185       688034 :   else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
   14186        40985 :            && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
   14187         8016 :            && VECTOR_BOOLEAN_TYPE_P (vectype)
   14188         8016 :            && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
   14189       333521 :            && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
   14190              :     {
   14191              :       /* If the input and result modes are the same, a different optab
   14192              :          is needed where we pass in the number of units in vectype.  */
   14193              :       optab1 = vec_unpacks_sbool_lo_optab;
   14194              :       optab2 = vec_unpacks_sbool_hi_optab;
   14195              :     }
   14196              : 
   14197       387695 :   vec_mode = TYPE_MODE (vectype);
   14198       387695 :   if (widening_fn_p (code))
   14199              :      {
   14200              :        /* If this is an internal fn then we must check whether the target
   14201              :           supports either a low-high split or an even-odd split.  */
   14202        54519 :       internal_fn ifn = as_internal_fn ((combined_fn) code);
   14203              : 
   14204        54519 :       internal_fn lo, hi, even, odd;
   14205        54519 :       lookup_hilo_internal_fn (ifn, &lo, &hi);
   14206        54519 :       if (BYTES_BIG_ENDIAN)
   14207              :         std::swap (lo, hi);
   14208        54519 :       *code1 = as_combined_fn (lo);
   14209        54519 :       *code2 = as_combined_fn (hi);
   14210        54519 :       optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
   14211        54519 :       optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
   14212              : 
   14213              :       /* If we don't support low-high, then check for even-odd.  */
   14214        54519 :       if (!optab1
   14215        54519 :           || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
   14216            0 :           || !optab2
   14217        54519 :           || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
   14218              :         {
   14219        54519 :           lookup_evenodd_internal_fn (ifn, &even, &odd);
   14220        54519 :           *code1 = as_combined_fn (even);
   14221        54519 :           *code2 = as_combined_fn (odd);
   14222        54519 :           optab1 = direct_internal_fn_optab (even, {vectype, vectype});
   14223        54519 :           optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
   14224              :         }
   14225              :     }
   14226       333176 :   else if (code.is_tree_code ())
   14227              :     {
   14228       333176 :       if (code == FIX_TRUNC_EXPR)
   14229              :         {
   14230              :           /* The signedness is determined from output operand.  */
   14231          119 :           optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
   14232          119 :           optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
   14233              :         }
   14234       333057 :       else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
   14235        40985 :                && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
   14236         8016 :                && VECTOR_BOOLEAN_TYPE_P (vectype)
   14237         8016 :                && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
   14238       333521 :                && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
   14239              :         {
   14240              :           /* If the input and result modes are the same, a different optab
   14241              :              is needed where we pass in the number of units in vectype.  */
   14242              :           optab1 = vec_unpacks_sbool_lo_optab;
   14243              :           optab2 = vec_unpacks_sbool_hi_optab;
   14244              :         }
   14245              :       else
   14246              :         {
   14247       332593 :           optab1 = optab_for_tree_code (c1, vectype, optab_default);
   14248       332593 :           optab2 = optab_for_tree_code (c2, vectype, optab_default);
   14249              :         }
   14250       333176 :       *code1 = c1;
   14251       333176 :       *code2 = c2;
   14252              :     }
   14253              : 
   14254       387695 :   if (!optab1 || !optab2)
   14255              :     return false;
   14256              : 
   14257       387695 :   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
   14258       387695 :        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
   14259       230587 :     return false;
   14260              : 
   14261              : 
   14262       157108 :   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
   14263       157108 :       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
   14264              :     {
   14265       145633 :       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   14266              :         return true;
   14267              :       /* For scalar masks we may have different boolean
   14268              :          vector types having the same QImode.  Thus we
   14269              :          add additional check for elements number.  */
   14270         4229 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
   14271              :                     TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
   14272              :         return true;
   14273              :     }
   14274              : 
   14275              :   /* Check if it's a multi-step conversion that can be done using intermediate
   14276              :      types.  */
   14277              : 
   14278        11680 :   prev_type = vectype;
   14279        11680 :   prev_mode = vec_mode;
   14280              : 
   14281       242552 :   if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
   14282              :     return false;
   14283              : 
   14284              :   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
   14285              :      intermediate steps in promotion sequence.  We try
   14286              :      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
   14287              :      not.  */
   14288        11628 :   interm_types->create (MAX_INTERM_CVT_STEPS);
   14289        13018 :   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
   14290              :     {
   14291        13018 :       intermediate_mode = insn_data[icode1].operand[0].mode;
   14292        13018 :       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
   14293         4795 :         intermediate_type
   14294         4795 :           = vect_halve_mask_nunits (prev_type, intermediate_mode);
   14295         8223 :       else if (VECTOR_MODE_P (intermediate_mode))
   14296              :         {
   14297         8223 :           tree intermediate_element_type
   14298         8223 :             = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
   14299         8223 :                                               TYPE_UNSIGNED (prev_type));
   14300         8223 :           intermediate_type
   14301         8223 :             = build_vector_type_for_mode (intermediate_element_type,
   14302              :                                           intermediate_mode);
   14303         8223 :         }
   14304              :       else
   14305            0 :         intermediate_type
   14306            0 :           = lang_hooks.types.type_for_mode (intermediate_mode,
   14307            0 :                                             TYPE_UNSIGNED (prev_type));
   14308              : 
   14309        13018 :       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
   14310         4795 :           && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
   14311         4795 :           && intermediate_mode == TYPE_MODE (wide_vectype)
   14312        13287 :           && SCALAR_INT_MODE_P (intermediate_mode))
   14313              :         {
   14314              :           /* If the input and result modes are the same, a different optab
   14315              :              is needed where we pass in the number of units in vectype.  */
   14316              :           optab3 = vec_unpacks_sbool_lo_optab;
   14317              :           optab4 = vec_unpacks_sbool_hi_optab;
   14318              :         }
   14319              :       else
   14320              :         {
   14321        12749 :           optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
   14322        12749 :           optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
   14323              :         }
   14324              : 
   14325        13018 :       if (!optab3 || !optab4
   14326        13018 :           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
   14327        12986 :           || insn_data[icode1].operand[0].mode != intermediate_mode
   14328        12986 :           || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
   14329        12986 :           || insn_data[icode2].operand[0].mode != intermediate_mode
   14330        12986 :           || ((icode1 = optab_handler (optab3, intermediate_mode))
   14331              :               == CODE_FOR_nothing)
   14332        25751 :           || ((icode2 = optab_handler (optab4, intermediate_mode))
   14333              :               == CODE_FOR_nothing))
   14334              :         break;
   14335              : 
   14336        12733 :       interm_types->quick_push (intermediate_type);
   14337        12733 :       (*multi_step_cvt)++;
   14338              : 
   14339        12733 :       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
   14340        12733 :           && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
   14341              :         {
   14342        11407 :           if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   14343              :             return true;
   14344         3785 :           if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
   14345              :                         TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
   14346              :             return true;
   14347              :         }
   14348              : 
   14349         1390 :       prev_type = intermediate_type;
   14350         1390 :       prev_mode = intermediate_mode;
   14351              :     }
   14352              : 
   14353          285 :   interm_types->release ();
   14354          285 :   return false;
   14355              : }
   14356              : 
   14357              : 
   14358              : /* Function supportable_narrowing_operation
   14359              : 
   14360              :    Check whether an operation represented by the code CODE is a
   14361              :    narrowing operation that is supported by the target platform in
   14362              :    vector form (i.e., when operating on arguments of type VECTYPE_IN
   14363              :    and producing a result of type VECTYPE_OUT).
   14364              : 
   14365              :    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
   14366              :    and FLOAT.  This function checks if these operations are supported by
   14367              :    the target platform directly via vector tree-codes.
   14368              : 
   14369              :    Output:
   14370              :    - CODE1 is the code of a vector operation to be used when
   14371              :    vectorizing the operation, if available.
   14372              :    - MULTI_STEP_CVT determines the number of required intermediate steps in
   14373              :    case of multi-step conversion (like int->short->char - in that case
   14374              :    MULTI_STEP_CVT will be 1).
   14375              :    - INTERM_TYPES contains the intermediate type required to perform the
   14376              :    narrowing operation (short in the above example).   */
   14377              : 
   14378              : bool
   14379        42095 : supportable_narrowing_operation (code_helper code,
   14380              :                                  tree vectype_out, tree vectype_in,
   14381              :                                  code_helper *code1, int *multi_step_cvt,
   14382              :                                  vec<tree> *interm_types)
   14383              : {
   14384        42095 :   machine_mode vec_mode;
   14385        42095 :   enum insn_code icode1;
   14386        42095 :   optab optab1, interm_optab;
   14387        42095 :   tree vectype = vectype_in;
   14388        42095 :   tree narrow_vectype = vectype_out;
   14389        42095 :   enum tree_code c1;
   14390        42095 :   tree intermediate_type, prev_type;
   14391        42095 :   machine_mode intermediate_mode, prev_mode;
   14392        42095 :   int i;
   14393        42095 :   unsigned HOST_WIDE_INT n_elts;
   14394        42095 :   bool uns;
   14395              : 
   14396        42095 :   if (!code.is_tree_code ())
   14397              :     return false;
   14398              : 
   14399        42095 :   *multi_step_cvt = 0;
   14400        42095 :   switch ((tree_code) code)
   14401              :     {
   14402        41251 :     CASE_CONVERT:
   14403        41251 :       c1 = VEC_PACK_TRUNC_EXPR;
   14404        41251 :       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
   14405        11684 :           && VECTOR_BOOLEAN_TYPE_P (vectype)
   14406        11684 :           && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
   14407         5262 :           && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
   14408        46513 :           && n_elts < BITS_PER_UNIT)
   14409              :         optab1 = vec_pack_sbool_trunc_optab;
   14410              :       else
   14411        38766 :         optab1 = optab_for_tree_code (c1, vectype, optab_default);
   14412              :       break;
   14413              : 
   14414          561 :     case FIX_TRUNC_EXPR:
   14415          561 :       c1 = VEC_PACK_FIX_TRUNC_EXPR;
   14416              :       /* The signedness is determined from output operand.  */
   14417          561 :       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
   14418          561 :       break;
   14419              : 
   14420          283 :     case FLOAT_EXPR:
   14421          283 :       c1 = VEC_PACK_FLOAT_EXPR;
   14422          283 :       optab1 = optab_for_tree_code (c1, vectype, optab_default);
   14423          283 :       break;
   14424              : 
   14425            0 :     default:
   14426            0 :       gcc_unreachable ();
   14427              :     }
   14428              : 
   14429        42095 :   if (!optab1)
   14430              :     return false;
   14431              : 
   14432        42095 :   vec_mode = TYPE_MODE (vectype);
   14433        42095 :   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
   14434              :     return false;
   14435              : 
   14436        37834 :   *code1 = c1;
   14437              : 
   14438        37834 :   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
   14439              :     {
   14440        23588 :       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   14441              :         return true;
   14442              :       /* For scalar masks we may have different boolean
   14443              :          vector types having the same QImode.  Thus we
   14444              :          add additional check for elements number.  */
   14445         5821 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
   14446              :                     TYPE_VECTOR_SUBPARTS (narrow_vectype)))
   14447              :         return true;
   14448              :     }
   14449              : 
   14450        14397 :   if (code == FLOAT_EXPR)
   14451              :     return false;
   14452              : 
   14453              :   /* Check if it's a multi-step conversion that can be done using intermediate
   14454              :      types.  */
   14455        14397 :   prev_mode = vec_mode;
   14456        14397 :   prev_type = vectype;
   14457        14397 :   if (code == FIX_TRUNC_EXPR)
   14458           94 :     uns = TYPE_UNSIGNED (vectype_out);
   14459              :   else
   14460        14303 :     uns = TYPE_UNSIGNED (vectype);
   14461              : 
   14462              :   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
   14463              :      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
   14464              :      costly than signed.  */
   14465        14397 :   if (code == FIX_TRUNC_EXPR && uns)
   14466              :     {
   14467           28 :       enum insn_code icode2;
   14468              : 
   14469           28 :       intermediate_type
   14470           28 :         = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
   14471           28 :       interm_optab
   14472           28 :         = optab_for_tree_code (c1, intermediate_type, optab_default);
   14473           28 :       if (interm_optab != unknown_optab
   14474           28 :           && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
   14475           28 :           && insn_data[icode1].operand[0].mode
   14476           28 :              == insn_data[icode2].operand[0].mode)
   14477              :         {
   14478              :           uns = false;
   14479              :           optab1 = interm_optab;
   14480              :           icode1 = icode2;
   14481              :         }
   14482              :     }
   14483              : 
   14484              :   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
   14485              :      intermediate steps in promotion sequence.  We try
   14486              :      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
   14487        14397 :   interm_types->create (MAX_INTERM_CVT_STEPS);
   14488        30940 :   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
   14489              :     {
   14490        16543 :       intermediate_mode = insn_data[icode1].operand[0].mode;
   14491        16543 :       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
   14492         7208 :         intermediate_type
   14493         7208 :           = vect_double_mask_nunits (prev_type, intermediate_mode);
   14494              :       else
   14495         9335 :         intermediate_type
   14496         9335 :           = lang_hooks.types.type_for_mode (intermediate_mode, uns);
   14497        16543 :       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
   14498         7208 :           && VECTOR_BOOLEAN_TYPE_P (prev_type)
   14499         7208 :           && SCALAR_INT_MODE_P (prev_mode)
   14500         3134 :           && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
   14501        19677 :           && n_elts < BITS_PER_UNIT)
   14502              :         interm_optab = vec_pack_sbool_trunc_optab;
   14503              :       else
   14504        16189 :         interm_optab
   14505        16189 :           = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
   14506              :                                  optab_default);
   14507          354 :       if (!interm_optab
   14508        16543 :           || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
   14509        16543 :           || insn_data[icode1].operand[0].mode != intermediate_mode
   14510        32732 :           || ((icode1 = optab_handler (interm_optab, intermediate_mode))
   14511              :               == CODE_FOR_nothing))
   14512              :         break;
   14513              : 
   14514        15628 :       interm_types->quick_push (intermediate_type);
   14515        15628 :       (*multi_step_cvt)++;
   14516              : 
   14517        15628 :       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
   14518              :         {
   14519        13482 :           if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   14520              :             return true;
   14521         5030 :           if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
   14522              :                         TYPE_VECTOR_SUBPARTS (narrow_vectype)))
   14523              :             return true;
   14524              :         }
   14525              : 
   14526         2146 :       prev_mode = intermediate_mode;
   14527         2146 :       prev_type = intermediate_type;
   14528         2146 :       optab1 = interm_optab;
   14529              :     }
   14530              : 
   14531          915 :   interm_types->release ();
   14532          915 :   return false;
   14533              : }
   14534              : 
   14535              : /* Function supportable_indirect_convert_operation
   14536              : 
   14537              :    Check whether an operation represented by the code CODE is single or multi
   14538              :    operations that are supported by the target platform in
   14539              :    vector form (i.e., when operating on arguments of type VECTYPE_IN
   14540              :    producing a result of type VECTYPE_OUT).
   14541              : 
   14542              :    Convert operations we currently support directly are FIX_TRUNC and FLOAT.
   14543              :    This function checks if these operations are supported
   14544              :    by the target platform directly (via vector tree-codes).
   14545              : 
   14546              :    Output:
   14547              :    - converts contains some pairs to perform the convert operation,
   14548              :    the pair's first is the intermediate type, and its second is the code of
   14549              :    a vector operation to be used when converting the operation from the
   14550              :    previous type to the intermediate type. */
   14551              : bool
   14552        85839 : supportable_indirect_convert_operation (code_helper code,
   14553              :                                         tree vectype_out,
   14554              :                                         tree vectype_in,
   14555              :                                         vec<std::pair<tree, tree_code> > &converts,
   14556              :                                         tree op0, slp_tree slp_op0)
   14557              : {
   14558        85839 :   bool found_mode = false;
   14559        85839 :   scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
   14560        85839 :   scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
   14561        85839 :   tree_code tc1, tc2, code1, code2;
   14562              : 
   14563        85839 :   tree cvt_type = NULL_TREE;
   14564        85839 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
   14565              : 
   14566        85839 :   if (supportable_convert_operation ((tree_code) code,
   14567              :                                      vectype_out,
   14568              :                                      vectype_in,
   14569              :                                      &tc1))
   14570              :     {
   14571        19182 :       converts.safe_push (std::make_pair (vectype_out, tc1));
   14572        19182 :       return true;
   14573              :     }
   14574              : 
   14575              :   /* For conversions between float and integer types try whether
   14576              :      we can use intermediate signed integer types to support the
   14577              :      conversion.  */
   14578       133314 :   if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
   14579        66657 :       && (code == FLOAT_EXPR
   14580         3164 :           || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
   14581              :     {
   14582          472 :       bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
   14583          236 :       bool float_expr_p = code == FLOAT_EXPR;
   14584          236 :       unsigned short target_size;
   14585          236 :       scalar_mode intermediate_mode;
   14586          236 :       if (demotion)
   14587              :         {
   14588           84 :           intermediate_mode = lhs_mode;
   14589           84 :           target_size = GET_MODE_SIZE (rhs_mode);
   14590              :         }
   14591              :       else
   14592              :         {
   14593          152 :           target_size = GET_MODE_SIZE (lhs_mode);
   14594          152 :           if (!int_mode_for_size
   14595          152 :               (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
   14596          132 :             return false;
   14597              :         }
   14598          236 :       code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
   14599              :       code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
   14600          236 :       opt_scalar_mode mode_iter;
   14601          411 :       FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
   14602              :         {
   14603          411 :           intermediate_mode = mode_iter.require ();
   14604              : 
   14605          822 :           if (GET_MODE_SIZE (intermediate_mode) > target_size)
   14606              :             break;
   14607              : 
   14608          345 :           scalar_mode cvt_mode;
   14609          345 :           if (!int_mode_for_size
   14610          345 :               (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
   14611              :             break;
   14612              : 
   14613          315 :           cvt_type = build_nonstandard_integer_type
   14614          315 :             (GET_MODE_BITSIZE (cvt_mode), 0);
   14615              : 
   14616              :           /* Check if the intermediate type can hold OP0's range.
   14617              :              When converting from float to integer this is not necessary
   14618              :              because values that do not fit the (smaller) target type are
   14619              :              unspecified anyway.  */
   14620          315 :           if (demotion && float_expr_p)
   14621              :             {
   14622            8 :               wide_int op_min_value, op_max_value;
   14623              :               /* For vector form, it looks like op0 doesn't have RANGE_INFO.
   14624              :                  In the future, if it is supported, changes may need to be made
   14625              :                  to this part, such as checking the RANGE of each element
   14626              :                  in the vector.  */
   14627            8 :               if (slp_op0)
   14628              :                 {
   14629            4 :                   tree def;
   14630              :                   /* ???  Merge ranges in case of more than one lane.  */
   14631            4 :                   if (SLP_TREE_LANES (slp_op0) != 1
   14632            0 :                       || !(def = vect_get_slp_scalar_def (slp_op0, 0))
   14633            4 :                       || !vect_get_range_info (def,
   14634              :                                                &op_min_value, &op_max_value))
   14635              :                     break;
   14636              :                 }
   14637            4 :               else if (!op0
   14638            0 :                        || TREE_CODE (op0) != SSA_NAME
   14639            0 :                        || !SSA_NAME_RANGE_INFO (op0)
   14640            4 :                        || !vect_get_range_info (op0, &op_min_value,
   14641              :                                                 &op_max_value))
   14642              :                 break;
   14643              : 
   14644            0 :               if (cvt_type == NULL_TREE
   14645            0 :                   || (wi::min_precision (op_max_value, SIGNED)
   14646            0 :                       > TYPE_PRECISION (cvt_type))
   14647            0 :                   || (wi::min_precision (op_min_value, SIGNED)
   14648            0 :                       > TYPE_PRECISION (cvt_type)))
   14649            0 :                 continue;
   14650            8 :             }
   14651              : 
   14652          307 :           cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
   14653              :                                                           cvt_type,
   14654              :                                                           nelts);
   14655              :           /* This should only happened for SLP as long as loop vectorizer
   14656              :              only supports same-sized vector.  */
   14657          482 :           if (cvt_type == NULL_TREE
   14658          439 :               || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
   14659          307 :               || !supportable_convert_operation ((tree_code) code1,
   14660              :                                                  vectype_out,
   14661              :                                                  cvt_type, &tc1)
   14662          515 :               || !supportable_convert_operation ((tree_code) code2,
   14663              :                                                  cvt_type,
   14664              :                                                  vectype_in, &tc2))
   14665          175 :             continue;
   14666              : 
   14667              :           found_mode = true;
   14668              :           break;
   14669              :         }
   14670              : 
   14671          236 :       if (found_mode)
   14672              :         {
   14673          132 :           converts.safe_push (std::make_pair (cvt_type, tc2));
   14674          132 :           if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
   14675          132 :             converts.safe_push (std::make_pair (vectype_out, tc1));
   14676          132 :           return true;
   14677              :         }
   14678              :     }
   14679              :   return false;
   14680              : }
   14681              : 
   14682              : /* Generate and return a vector mask of MASK_TYPE such that
   14683              :    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
   14684              :    Add the statements to SEQ.  */
   14685              : 
   14686              : tree
   14687            0 : vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
   14688              :                 tree end_index, const char *name)
   14689              : {
   14690            0 :   tree cmp_type = TREE_TYPE (start_index);
   14691            0 :   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
   14692              :                                                        cmp_type, mask_type,
   14693              :                                                        OPTIMIZE_FOR_SPEED));
   14694            0 :   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
   14695              :                                             start_index, end_index,
   14696              :                                             build_zero_cst (mask_type));
   14697            0 :   tree tmp;
   14698            0 :   if (name)
   14699            0 :     tmp = make_temp_ssa_name (mask_type, NULL, name);
   14700              :   else
   14701            0 :     tmp = make_ssa_name (mask_type);
   14702            0 :   gimple_call_set_lhs (call, tmp);
   14703            0 :   gimple_seq_add_stmt (seq, call);
   14704            0 :   return tmp;
   14705              : }
   14706              : 
   14707              : /* Generate a vector mask of type MASK_TYPE for which index I is false iff
   14708              :    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
   14709              : 
   14710              : tree
   14711            0 : vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
   14712              :                     tree end_index)
   14713              : {
   14714            0 :   tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
   14715            0 :   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
   14716              : }
   14717              : 
   14718              : /* Try to compute the vector types required to vectorize STMT_INFO,
   14719              :    returning true on success and false if vectorization isn't possible.
   14720              :    If GROUP_SIZE is nonzero and we're performing BB vectorization,
   14721              :    take sure that the number of elements in the vectors is no bigger
   14722              :    than GROUP_SIZE.
   14723              : 
   14724              :    On success:
   14725              : 
   14726              :    - Set *STMT_VECTYPE_OUT to:
   14727              :      - NULL_TREE if the statement doesn't need to be vectorized;
   14728              :      - the equivalent of STMT_VINFO_VECTYPE otherwise.
   14729              : 
   14730              :    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
   14731              :      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
   14732              :      statement does not help to determine the overall number of units.  */
   14733              : 
   14734              : opt_result
   14735      5766336 : vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
   14736              :                                 tree *stmt_vectype_out,
   14737              :                                 tree *nunits_vectype_out,
   14738              :                                 unsigned int group_size)
   14739              : {
   14740      5766336 :   gimple *stmt = stmt_info->stmt;
   14741              : 
   14742              :   /* For BB vectorization, we should always have a group size once we've
   14743              :      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
   14744              :      are tentative requests during things like early data reference
   14745              :      analysis and pattern recognition.  */
   14746      5766336 :   if (is_a <bb_vec_info> (vinfo))
   14747      4518568 :     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
   14748              :   else
   14749              :     group_size = 0;
   14750              : 
   14751      5766336 :   *stmt_vectype_out = NULL_TREE;
   14752      5766336 :   *nunits_vectype_out = NULL_TREE;
   14753              : 
   14754      5766336 :   if (gimple_get_lhs (stmt) == NULL_TREE
   14755              :       /* Allow vector conditionals through here.  */
   14756         2762 :       && !is_a <gcond *> (stmt)
   14757              :       /* MASK_STORE and friends have no lhs, but are ok.  */
   14758      5771840 :       && !(is_gimple_call (stmt)
   14759         2762 :            && gimple_call_internal_p (stmt)
   14760         2742 :            && internal_store_fn_p (gimple_call_internal_fn (stmt))))
   14761              :     {
   14762           20 :       if (is_a <gcall *> (stmt))
   14763              :         {
   14764              :           /* Ignore calls with no lhs.  These must be calls to
   14765              :              #pragma omp simd functions, and what vectorization factor
   14766              :              it really needs can't be determined until
   14767              :              vectorizable_simd_clone_call.  */
   14768           20 :           if (dump_enabled_p ())
   14769           18 :             dump_printf_loc (MSG_NOTE, vect_location,
   14770              :                              "defer to SIMD clone analysis.\n");
   14771           20 :           return opt_result::success ();
   14772              :         }
   14773              : 
   14774            0 :       return opt_result::failure_at (stmt,
   14775              :                                      "not vectorized: irregular stmt: %G", stmt);
   14776              :     }
   14777              : 
   14778      5766316 :   tree vectype;
   14779      5766316 :   tree scalar_type = NULL_TREE;
   14780      5766316 :   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
   14781              :     {
   14782      1578986 :       vectype = STMT_VINFO_VECTYPE (stmt_info);
   14783      1578986 :       if (dump_enabled_p ())
   14784        79463 :         dump_printf_loc (MSG_NOTE, vect_location,
   14785              :                          "precomputed vectype: %T\n", vectype);
   14786              :     }
   14787      4187330 :   else if (vect_use_mask_type_p (stmt_info))
   14788              :     {
   14789       194502 :       unsigned int precision = stmt_info->mask_precision;
   14790       194502 :       scalar_type = build_nonstandard_integer_type (precision, 1);
   14791       194502 :       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
   14792       194502 :       if (!vectype)
   14793            0 :         return opt_result::failure_at (stmt, "not vectorized: unsupported"
   14794              :                                        " data-type %T\n", scalar_type);
   14795       194502 :       if (dump_enabled_p ())
   14796         4737 :         dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
   14797              :     }
   14798              :   else
   14799              :     {
   14800              :       /* If we got here with a gcond it means that the target had no available vector
   14801              :          mode for the scalar type.  We can't vectorize so abort.  */
   14802      3992828 :       if (is_a <gcond *> (stmt))
   14803            0 :         return opt_result::failure_at (stmt,
   14804              :                                        "not vectorized:"
   14805              :                                        " unsupported data-type for gcond %T\n",
   14806              :                                        scalar_type);
   14807              : 
   14808      3992828 :       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
   14809      1456736 :         scalar_type = TREE_TYPE (DR_REF (dr));
   14810              :       else
   14811      2536092 :         scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
   14812              : 
   14813      3992828 :       if (dump_enabled_p ())
   14814              :         {
   14815        62318 :           if (group_size)
   14816         7748 :             dump_printf_loc (MSG_NOTE, vect_location,
   14817              :                              "get vectype for scalar type (group size %d):"
   14818              :                              " %T\n", group_size, scalar_type);
   14819              :           else
   14820        54570 :             dump_printf_loc (MSG_NOTE, vect_location,
   14821              :                              "get vectype for scalar type: %T\n", scalar_type);
   14822              :         }
   14823      3992828 :       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
   14824      3992828 :       if (!vectype)
   14825       207005 :         return opt_result::failure_at (stmt,
   14826              :                                        "not vectorized:"
   14827              :                                        " unsupported data-type %T\n",
   14828              :                                        scalar_type);
   14829              : 
   14830      3785823 :       if (dump_enabled_p ())
   14831        62119 :         dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
   14832              :     }
   14833              : 
   14834      4059788 :   if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
   14835            0 :     return opt_result::failure_at (stmt,
   14836              :                                    "not vectorized: vector stmt in loop:%G",
   14837              :                                    stmt);
   14838              : 
   14839      5559311 :   *stmt_vectype_out = vectype;
   14840              : 
   14841              :   /* Don't try to compute scalar types if the stmt produces a boolean
   14842              :      vector; use the existing vector type instead.  */
   14843      5559311 :   tree nunits_vectype = vectype;
   14844      5559311 :   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
   14845              :     {
   14846              :       /* The number of units is set according to the smallest scalar
   14847              :          type (or the largest vector size, but we only support one
   14848              :          vector size per vectorization).  */
   14849      5044044 :       scalar_type = vect_get_smallest_scalar_type (stmt_info,
   14850      5044044 :                                                    TREE_TYPE (vectype));
   14851      5044044 :       if (!types_compatible_p (scalar_type, TREE_TYPE (vectype)))
   14852              :         {
   14853       982161 :           if (dump_enabled_p ())
   14854         8345 :             dump_printf_loc (MSG_NOTE, vect_location,
   14855              :                              "get vectype for smallest scalar type: %T\n",
   14856              :                              scalar_type);
   14857       982161 :           nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
   14858              :                                                         group_size);
   14859       982161 :           if (!nunits_vectype)
   14860           10 :             return opt_result::failure_at
   14861           10 :               (stmt, "not vectorized: unsupported data-type %T\n",
   14862              :                scalar_type);
   14863       982151 :           if (dump_enabled_p ())
   14864         8345 :             dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
   14865              :                              nunits_vectype);
   14866              :         }
   14867              :     }
   14868              : 
   14869      5559301 :   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
   14870      5559301 :                    TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
   14871            0 :     return opt_result::failure_at (stmt,
   14872              :                                    "Not vectorized: Incompatible number "
   14873              :                                    "of vector subparts between %T and %T\n",
   14874              :                                    nunits_vectype, *stmt_vectype_out);
   14875              : 
   14876      5559301 :   if (dump_enabled_p ())
   14877              :     {
   14878       146319 :       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
   14879       146319 :       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
   14880       146319 :       dump_printf (MSG_NOTE, "\n");
   14881              :     }
   14882              : 
   14883      5559301 :   *nunits_vectype_out = nunits_vectype;
   14884      5559301 :   return opt_result::success ();
   14885              : }
   14886              : 
   14887              : /* Generate and return statement sequence that sets vector length LEN that is:
   14888              : 
   14889              :    min_of_start_and_end = min (START_INDEX, END_INDEX);
   14890              :    left_len = END_INDEX - min_of_start_and_end;
   14891              :    rhs = min (left_len, LEN_LIMIT);
   14892              :    LEN = rhs;
   14893              : 
   14894              :    Note: the cost of the code generated by this function is modeled
   14895              :    by vect_estimate_min_profitable_iters, so changes here may need
   14896              :    corresponding changes there.  */
   14897              : 
   14898              : gimple_seq
   14899            0 : vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
   14900              : {
   14901            0 :   gimple_seq stmts = NULL;
   14902            0 :   tree len_type = TREE_TYPE (len);
   14903            0 :   gcc_assert (TREE_TYPE (start_index) == len_type);
   14904              : 
   14905            0 :   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
   14906            0 :   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
   14907            0 :   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
   14908            0 :   gimple* stmt = gimple_build_assign (len, rhs);
   14909            0 :   gimple_seq_add_stmt (&stmts, stmt);
   14910              : 
   14911            0 :   return stmts;
   14912              : }
   14913              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.