LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.9 % 3128 2780
Test Date: 2026-06-20 15:32:29 Functions: 97.6 % 85 83
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Analysis Utilities for Loop Vectorization.
       2              :    Copyright (C) 2006-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "gimple-iterator.h"
      29              : #include "gimple-fold.h"
      30              : #include "ssa.h"
      31              : #include "expmed.h"
      32              : #include "optabs-tree.h"
      33              : #include "insn-config.h"
      34              : #include "recog.h"            /* FIXME: for insn_data */
      35              : #include "fold-const.h"
      36              : #include "stor-layout.h"
      37              : #include "tree-eh.h"
      38              : #include "gimplify.h"
      39              : #include "gimple-iterator.h"
      40              : #include "gimple-fold.h"
      41              : #include "gimplify-me.h"
      42              : #include "cfgloop.h"
      43              : #include "tree-vectorizer.h"
      44              : #include "dumpfile.h"
      45              : #include "builtins.h"
      46              : #include "internal-fn.h"
      47              : #include "case-cfn-macros.h"
      48              : #include "fold-const-call.h"
      49              : #include "attribs.h"
      50              : #include "cgraph.h"
      51              : #include "omp-simd-clone.h"
      52              : #include "predict.h"
      53              : #include "tree-vector-builder.h"
      54              : #include "tree-ssa-loop-ivopts.h"
      55              : #include "vec-perm-indices.h"
      56              : #include "gimple-range.h"
      57              : #include "alias.h"
      58              : 
      59              : 
      60              : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      61              :    in the first operand.  Disentangling this is future work, the
      62              :    IL is properly transferred to VEC_COND_EXPRs with separate compares.  */
      63              : 
      64              : 
      65              : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      66              :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      67              : 
      68              : bool
      69     12266167 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      70              : {
      71     12266167 :   int_range_max vr;
      72     12266167 :   tree vr_min, vr_max;
      73     24532334 :   get_range_query (cfun)->range_of_expr (vr, var);
      74     12266167 :   if (vr.undefined_p ())
      75           71 :     vr.set_varying (TREE_TYPE (var));
      76     12266167 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      77     12266167 :   *min_value = wi::to_wide (vr_min);
      78     12266167 :   *max_value = wi::to_wide (vr_max);
      79     12266167 :   wide_int nonzero = get_nonzero_bits (var);
      80     12266167 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      81     12266167 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      82              :                                          nonzero, sgn) == VR_RANGE)
      83              :     {
      84      6091902 :       if (dump_enabled_p ())
      85              :         {
      86        88391 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      87        88391 :           dump_printf (MSG_NOTE, " has range [");
      88        88391 :           dump_hex (MSG_NOTE, *min_value);
      89        88391 :           dump_printf (MSG_NOTE, ", ");
      90        88391 :           dump_hex (MSG_NOTE, *max_value);
      91        88391 :           dump_printf (MSG_NOTE, "]\n");
      92              :         }
      93      6091902 :       return true;
      94              :     }
      95              :   else
      96              :     {
      97      6174265 :       if (dump_enabled_p ())
      98              :         {
      99        67618 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     100        67618 :           dump_printf (MSG_NOTE, " has no range info\n");
     101              :         }
     102      6174265 :       return false;
     103              :     }
     104     12266167 : }
     105              : 
     106              : /* Report that we've found an instance of pattern PATTERN in
     107              :    statement STMT.  */
     108              : 
     109              : static void
     110      1257245 : vect_pattern_detected (const char *name, gimple *stmt)
     111              : {
     112      1257245 :   if (dump_enabled_p ())
     113        25167 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     114      1257245 : }
     115              : 
     116              : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     117              :    return the pattern statement's stmt_vec_info.  Set its vector type to
     118              :    VECTYPE if it doesn't have one already.  */
     119              : 
     120              : static stmt_vec_info
     121      2427841 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     122              :                         stmt_vec_info orig_stmt_info, tree vectype)
     123              : {
     124      2427841 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     125      2427841 :   if (pattern_stmt_info == NULL)
     126      1299749 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     127      2427841 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     128              : 
     129      2427841 :   pattern_stmt_info->pattern_stmt_p = true;
     130      2427841 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     131      2427841 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     132      2427841 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     133      2427841 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     134              :     {
     135      2196733 :       gcc_assert (!vectype
     136              :                   || is_a <gcond *> (pattern_stmt)
     137              :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     138              :                       == vect_use_mask_type_p (orig_stmt_info)));
     139      1310464 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     140      1310464 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     141              :     }
     142      2427841 :   return pattern_stmt_info;
     143              : }
     144              : 
     145              : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     146              :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     147              :    have one already.  */
     148              : 
     149              : static void
     150      1034080 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     151              :                        stmt_vec_info orig_stmt_info, tree vectype)
     152              : {
     153      1034080 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     154      1034080 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     155            0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     156      1003092 : }
     157              : 
     158              : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     159              :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     160              :    be different from the vector type of the final pattern statement.
     161              :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     162              :    from which it was derived.  */
     163              : 
     164              : static inline void
     165      1352676 : append_pattern_def_seq (vec_info *vinfo,
     166              :                         stmt_vec_info stmt_info, gimple *new_stmt,
     167              :                         tree vectype = NULL_TREE,
     168              :                         tree scalar_type_for_mask = NULL_TREE)
     169              : {
     170      2065418 :   gcc_assert (!scalar_type_for_mask
     171              :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     172      1352676 :   if (vectype)
     173              :     {
     174      1117616 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     175      1117616 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     176      1117616 :       if (scalar_type_for_mask)
     177       639934 :         new_stmt_info->mask_precision
     178      1279868 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     179              :     }
     180      1352676 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     181              :                                       new_stmt);
     182      1352676 : }
     183              : 
     184              : 
     185              : /* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
     186              :    statements are not vectorized but are materialized as scalar in the loop
     187              :    preheader.  */
     188              : 
     189              : static inline void
     190         1383 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
     191              : {
     192         1383 :   gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
     193              : }
     194              : 
     195              : /* The caller wants to perform new operations on vect_external variable
     196              :    VAR, so that the result of the operations would also be vect_external.
     197              :    Return the edge on which the operations can be performed, if one exists.
     198              :    Return null if the operations should instead be treated as part of
     199              :    the pattern that needs them.  */
     200              : 
     201              : static edge
     202         8608 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     203              : {
     204         8608 :   edge e = NULL;
     205         8608 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     206              :     {
     207          819 :       e = loop_preheader_edge (loop_vinfo->loop);
     208          819 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     209              :         {
     210          623 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     211          623 :           if (bb == NULL
     212          623 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     213              :             e = NULL;
     214              :         }
     215              :     }
     216         8608 :   return e;
     217              : }
     218              : 
     219              : /* Return true if the target supports a vector version of CODE,
     220              :    where CODE is known to map to a direct optab with the given SUBTYPE.
     221              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     222              :    specifies the type of the scalar result.
     223              : 
     224              :    If CODE allows the inputs and outputs to have different type
     225              :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     226              :    than the output mode that determines the appropriate target pattern.
     227              :    Operand 0 of the target pattern then specifies the mode that the output
     228              :    must have.
     229              : 
     230              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     231              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     232              :    is nonnull.  */
     233              : 
     234              : static bool
     235          813 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     236              :                                  tree itype, tree *vecotype_out,
     237              :                                  tree *vecitype_out = NULL,
     238              :                                  enum optab_subtype subtype = optab_default)
     239              : {
     240          813 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     241          813 :   if (!vecitype)
     242              :     return false;
     243              : 
     244          813 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     245          813 :   if (!vecotype)
     246              :     return false;
     247              : 
     248          813 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     249          813 :   if (!optab)
     250              :     return false;
     251              : 
     252          813 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     253          813 :   if (icode == CODE_FOR_nothing
     254          813 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     255          394 :     return false;
     256              : 
     257          419 :   *vecotype_out = vecotype;
     258          419 :   if (vecitype_out)
     259          412 :     *vecitype_out = vecitype;
     260              :   return true;
     261              : }
     262              : 
     263              : /* Return true if the target supports a vector version of CODE,
     264              :    where CODE is known to map to a conversion optab with the given SUBTYPE.
     265              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     266              :    specifies the type of the scalar result.
     267              : 
     268              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     269              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     270              :    is nonnull.  */
     271              : 
     272              : static bool
     273         3667 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
     274              :                                  tree itype, tree *vecotype_out,
     275              :                                  tree *vecitype_out = NULL,
     276              :                                  enum optab_subtype subtype = optab_default)
     277              : {
     278         3667 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     279         3667 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     280         3667 :   if (!vecitype || !vecotype)
     281              :     return false;
     282              : 
     283         3405 :   if (!directly_supported_p (code, vecotype, vecitype, subtype))
     284              :     return false;
     285              : 
     286          714 :   *vecotype_out = vecotype;
     287          714 :   if (vecitype_out)
     288          714 :     *vecitype_out = vecitype;
     289              :   return true;
     290              : }
     291              : 
     292              : /* Round bit precision PRECISION up to a full element.  */
     293              : 
     294              : static unsigned int
     295      3250251 : vect_element_precision (unsigned int precision)
     296              : {
     297            0 :   precision = 1 << ceil_log2 (precision);
     298      4874654 :   return MAX (precision, BITS_PER_UNIT);
     299              : }
     300              : 
     301              : /* If OP is defined by a statement that's being considered for vectorization,
     302              :    return information about that statement, otherwise return NULL.  */
     303              : 
     304              : static stmt_vec_info
     305       342487 : vect_get_internal_def (vec_info *vinfo, tree op)
     306              : {
     307       342487 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     308       342487 :   if (def_stmt_info
     309       329054 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     310       313245 :     return vect_stmt_to_vectorize (def_stmt_info);
     311              :   return NULL;
     312              : }
     313              : 
     314              : /* Holds information about an input operand after some sign changes
     315              :    and type promotions have been peeled away.  */
     316              : class vect_unpromoted_value {
     317              : public:
     318              :   vect_unpromoted_value ();
     319              : 
     320              :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     321              : 
     322              :   /* The value obtained after peeling away zero or more casts.  */
     323              :   tree op;
     324              : 
     325              :   /* The type of OP.  */
     326              :   tree type;
     327              : 
     328              :   /* The definition type of OP.  */
     329              :   vect_def_type dt;
     330              : 
     331              :   /* If OP is the result of peeling at least one cast, and if the cast
     332              :      of OP itself is a vectorizable statement, CASTER identifies that
     333              :      statement, otherwise it is null.  */
     334              :   stmt_vec_info caster;
     335              : };
     336              : 
     337    294383156 : inline vect_unpromoted_value::vect_unpromoted_value ()
     338    294383156 :   : op (NULL_TREE),
     339    294383156 :     type (NULL_TREE),
     340    294383156 :     dt (vect_uninitialized_def),
     341      3235332 :     caster (NULL)
     342              : {
     343              : }
     344              : 
     345              : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     346              :    statement that casts it to CASTER_IN.  */
     347              : 
     348              : inline void
     349     11382745 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     350              :                                stmt_vec_info caster_in)
     351              : {
     352     11382745 :   op = op_in;
     353     11382745 :   type = TREE_TYPE (op);
     354     11382745 :   dt = dt_in;
     355     11382745 :   caster = caster_in;
     356     11382745 : }
     357              : 
     358              : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     359              :    to reach some vectorizable inner operand OP', continuing as long as it
     360              :    is possible to convert OP' back to OP using a possible sign change
     361              :    followed by a possible promotion P.  Return this OP', or null if OP is
     362              :    not a vectorizable SSA name.  If there is a promotion P, describe its
     363              :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     364              :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     365              :    have more than one user.
     366              : 
     367              :    A successful return means that it is possible to go from OP' to OP
     368              :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     369              :    whereas the cast from UNPROM to OP might be a promotion, a sign
     370              :    change, or a nop.
     371              : 
     372              :    E.g. say we have:
     373              : 
     374              :        signed short *ptr = ...;
     375              :        signed short C = *ptr;
     376              :        unsigned short B = (unsigned short) C;    // sign change
     377              :        signed int A = (signed int) B;            // unsigned promotion
     378              :        ...possible other uses of A...
     379              :        unsigned int OP = (unsigned int) A;       // sign change
     380              : 
     381              :    In this case it's possible to go directly from C to OP using:
     382              : 
     383              :        OP = (unsigned int) (unsigned short) C;
     384              :             +------------+ +--------------+
     385              :                promotion      sign change
     386              : 
     387              :    so OP' would be C.  The input to the promotion is B, so UNPROM
     388              :    would describe B.  */
     389              : 
     390              : static tree
     391      8400130 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     392              :                                       vect_unpromoted_value *unprom,
     393              :                                       bool *single_use_p = NULL)
     394              : {
     395      8400130 :   tree op_type = TREE_TYPE (op);
     396      8400130 :   if (!INTEGRAL_TYPE_P (op_type))
     397              :     return NULL_TREE;
     398              : 
     399      8348875 :   tree res = NULL_TREE;
     400      8348875 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     401      8348875 :   unsigned int min_precision = orig_precision;
     402      8348875 :   stmt_vec_info caster = NULL;
     403     10010846 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     404              :     {
     405              :       /* See whether OP is simple enough to vectorize.  */
     406      9782387 :       stmt_vec_info def_stmt_info;
     407      9782387 :       gimple *def_stmt;
     408      9782387 :       vect_def_type dt;
     409      9782387 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     410              :         break;
     411              : 
     412              :       /* If OP is the input of a demotion, skip over it to see whether
     413              :          OP is itself the result of a promotion.  If so, the combined
     414              :          effect of the promotion and the demotion might fit the required
     415              :          pattern, otherwise neither operation fits.
     416              : 
     417              :          This copes with cases such as the result of an arithmetic
     418              :          operation being truncated before being stored, and where that
     419              :          arithmetic operation has been recognized as an over-widened one.  */
     420      9766201 :       if (TYPE_PRECISION (op_type) <= min_precision)
     421              :         {
     422              :           /* Use OP as the UNPROM described above if we haven't yet
     423              :              found a promotion, or if using the new input preserves the
     424              :              sign of the previous promotion.  */
     425      9639994 :           if (!res
     426      1417367 :               || TYPE_PRECISION (unprom->type) == orig_precision
     427        38752 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
     428      9675937 :               || (TYPE_UNSIGNED (op_type)
     429        25429 :                   && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
     430              :             {
     431      9604481 :               unprom->set_op (op, dt, caster);
     432      9604481 :               min_precision = TYPE_PRECISION (op_type);
     433              :             }
     434              :           /* Stop if we've already seen a promotion and if this
     435              :              conversion does more than change the sign.  */
     436        35513 :           else if (TYPE_PRECISION (op_type)
     437        35513 :                    != TYPE_PRECISION (unprom->type))
     438              :             break;
     439              : 
     440              :           /* The sequence now extends to OP.  */
     441              :           res = op;
     442              :         }
     443              : 
     444              :       /* See whether OP is defined by a cast.  Record it as CASTER if
     445              :          the cast is potentially vectorizable.  */
     446      9766157 :       if (!def_stmt)
     447              :         break;
     448      9565533 :       caster = def_stmt_info;
     449              : 
     450              :       /* Ignore pattern statements, since we don't link uses for them.  */
     451      9565533 :       if (caster
     452      9565533 :           && single_use_p
     453      1970517 :           && !STMT_VINFO_RELATED_STMT (caster)
     454     11388383 :           && !has_single_use (res))
     455      1096012 :         *single_use_p = false;
     456              : 
     457     17685949 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     458      6014223 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     459              :         break;
     460              : 
     461              :       /* Continue with the input to the cast.  */
     462      1661971 :       op = gimple_assign_rhs1 (def_stmt);
     463      1661971 :       op_type = TREE_TYPE (op);
     464              :     }
     465              :   return res;
     466              : }
     467              : 
     468              : /* OP is an integer operand to an operation that returns TYPE, and we
     469              :    want to treat the operation as a widening one.  So far we can treat
     470              :    it as widening from *COMMON_TYPE.
     471              : 
     472              :    Return true if OP is suitable for such a widening operation,
     473              :    either widening from *COMMON_TYPE or from some supertype of it.
     474              :    Update *COMMON_TYPE to the supertype in the latter case.
     475              : 
     476              :    SHIFT_P is true if OP is a shift amount.  */
     477              : 
     478              : static bool
     479       300909 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     480              :                             tree *common_type)
     481              : {
     482              :   /* Calculate the minimum precision required by OP, without changing
     483              :      the sign of either operand.  */
     484       300909 :   unsigned int precision;
     485       300909 :   if (shift_p)
     486              :     {
     487        12988 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     488              :         return false;
     489        10419 :       precision = TREE_INT_CST_LOW (op);
     490              :     }
     491              :   else
     492              :     {
     493       287921 :       precision = wi::min_precision (wi::to_widest (op),
     494       287921 :                                      TYPE_SIGN (*common_type));
     495       287921 :       if (precision * 2 > TYPE_PRECISION (type))
     496              :         return false;
     497              :     }
     498              : 
     499              :   /* If OP requires a wider type, switch to that type.  The checks
     500              :      above ensure that this is still narrower than the result.  */
     501       284662 :   precision = vect_element_precision (precision);
     502       284662 :   if (TYPE_PRECISION (*common_type) < precision)
     503         6441 :     *common_type = build_nonstandard_integer_type
     504         6441 :       (precision, TYPE_UNSIGNED (*common_type));
     505              :   return true;
     506              : }
     507              : 
     508              : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     509              :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     510              : 
     511              : static bool
     512        45127 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     513              : {
     514        45127 :   if (types_compatible_p (*common_type, new_type))
     515              :     return true;
     516              : 
     517              :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     518         7760 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     519         7760 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     520              :     return true;
     521              : 
     522              :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     523         7147 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     524         7147 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     525              :     {
     526          342 :       *common_type = new_type;
     527          342 :       return true;
     528              :     }
     529              : 
     530              :   /* We have mismatched signs, with the signed type being
     531              :      no wider than the unsigned type.  In this case we need
     532              :      a wider signed type.  */
     533         6805 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     534              :                                 TYPE_PRECISION (new_type));
     535         6805 :   precision *= 2;
     536              : 
     537         6805 :   if (precision * 2 > TYPE_PRECISION (type))
     538              :     return false;
     539              : 
     540           43 :   *common_type = build_nonstandard_integer_type (precision, false);
     541           43 :   return true;
     542              : }
     543              : 
     544              : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     545              :    in which each node either performs CODE or WIDENED_CODE, and where
     546              :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     547              :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     548              :    CODE and WIDENED_CODE are some sort of shift.
     549              : 
     550              :    If STMT_INFO is such a tree, return the number of leaf operands
     551              :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     552              :    to a type that (a) is narrower than the result of STMT_INFO and
     553              :    (b) can hold all leaf operand values.
     554              : 
     555              :    If SUBTYPE then allow that the signs of the operands
     556              :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     557              :    this.
     558              : 
     559              :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     560              :    exists.  */
     561              : 
     562              : static unsigned int
     563    124081592 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     564              :                       code_helper widened_code, bool shift_p,
     565              :                       unsigned int max_nops,
     566              :                       vect_unpromoted_value *unprom, tree *common_type,
     567              :                       enum optab_subtype *subtype = NULL)
     568              : {
     569              :   /* Check for an integer operation with the right code.  */
     570    124081592 :   gimple* stmt = stmt_info->stmt;
     571    124081592 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     572              :     return 0;
     573              : 
     574    100220392 :   code_helper rhs_code;
     575    100220392 :   if (is_gimple_assign (stmt))
     576     85924664 :     rhs_code = gimple_assign_rhs_code (stmt);
     577     14295728 :   else if (is_gimple_call (stmt))
     578     14295728 :     rhs_code = gimple_call_combined_fn (stmt);
     579              :   else
     580              :     return 0;
     581              : 
     582    100220392 :   if (rhs_code != code
     583    100220392 :       && rhs_code != widened_code)
     584              :     return 0;
     585              : 
     586      6323555 :   tree lhs = gimple_get_lhs (stmt);
     587      6323555 :   tree type = TREE_TYPE (lhs);
     588      6323555 :   if (!INTEGRAL_TYPE_P (type))
     589              :     return 0;
     590              : 
     591              :   /* Assume that both operands will be leaf operands.  */
     592      5742334 :   max_nops -= 2;
     593              : 
     594              :   /* Check the operands.  */
     595      5742334 :   unsigned int next_op = 0;
     596      6491833 :   for (unsigned int i = 0; i < 2; ++i)
     597              :     {
     598      6168383 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     599      6168383 :       unsigned int nops = 1;
     600      6168383 :       tree op = gimple_arg (stmt, i);
     601      6168383 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     602              :         {
     603              :           /* We already have a common type from earlier operands.
     604              :              Update it to account for OP.  */
     605       300909 :           this_unprom->set_op (op, vect_constant_def);
     606       300909 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     607              :             return 0;
     608              :         }
     609              :       else
     610              :         {
     611              :           /* Only allow shifts by constants.  */
     612      5867474 :           if (shift_p && i == 1)
     613              :             return 0;
     614              : 
     615      5861413 :           if (rhs_code != code)
     616              :             {
     617              :               /* If rhs_code is widened_code, don't look through further
     618              :                  possible promotions, there is a promotion already embedded
     619              :                  in the WIDEN_*_EXPR.  */
     620         1662 :               if (TREE_CODE (op) != SSA_NAME
     621         1662 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     622            0 :                 return 0;
     623              : 
     624         1662 :               stmt_vec_info def_stmt_info;
     625         1662 :               gimple *def_stmt;
     626         1662 :               vect_def_type dt;
     627         1662 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     628              :                                        &def_stmt))
     629              :                 return 0;
     630         1662 :               this_unprom->set_op (op, dt, NULL);
     631              :             }
     632      5859751 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     633              :                                                           this_unprom))
     634              :             return 0;
     635              : 
     636      5739273 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     637              :             {
     638              :               /* The operand isn't widened.  If STMT_INFO has the code
     639              :                  for an unwidened operation, recursively check whether
     640              :                  this operand is a node of the tree.  */
     641      5263907 :               if (rhs_code != code
     642      5263907 :                   || max_nops == 0
     643      5264373 :                   || this_unprom->dt != vect_internal_def)
     644              :                 return 0;
     645              : 
     646              :               /* Give back the leaf slot allocated above now that we're
     647              :                  not treating this as a leaf operand.  */
     648          466 :               max_nops += 1;
     649              : 
     650              :               /* Recursively process the definition of the operand.  */
     651          466 :               stmt_vec_info def_stmt_info
     652          466 :                 = vect_get_internal_def (vinfo, this_unprom->op);
     653              : 
     654          466 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     655              :                                            widened_code, shift_p, max_nops,
     656              :                                            this_unprom, common_type,
     657              :                                            subtype);
     658          466 :               if (nops == 0)
     659              :                 return 0;
     660              : 
     661          311 :               max_nops -= nops;
     662              :             }
     663              :           else
     664              :             {
     665              :               /* Make sure that the operand is narrower than the result.  */
     666       475366 :               if (TYPE_PRECISION (this_unprom->type) * 2
     667       475366 :                   > TYPE_PRECISION (type))
     668              :                 return 0;
     669              : 
     670              :               /* Update COMMON_TYPE for the new operand.  */
     671       471003 :               if (i == 0)
     672       425876 :                 *common_type = this_unprom->type;
     673        45127 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     674              :                                                  common_type))
     675              :                 {
     676         6762 :                   if (subtype)
     677              :                     {
     678              :                       /* See if we can sign extend the smaller type.  */
     679          285 :                       if (TYPE_PRECISION (this_unprom->type)
     680          285 :                           > TYPE_PRECISION (*common_type))
     681           27 :                         *common_type = this_unprom->type;
     682          285 :                       *subtype = optab_vector_mixed_sign;
     683              :                     }
     684              :                   else
     685              :                     return 0;
     686              :                 }
     687              :             }
     688              :         }
     689       749499 :       next_op += nops;
     690              :     }
     691              :   return next_op;
     692              : }
     693              : 
     694              : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     695              :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     696              : 
     697              : static tree
     698      2063422 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     699              : {
     700            0 :   return make_temp_ssa_name (type, stmt, "patt");
     701              : }
     702              : 
     703              : /* STMT2_INFO describes a type conversion that could be split into STMT1
     704              :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     705              :    input.  Try to do this using pattern statements, returning true on
     706              :    success.  */
     707              : 
     708              : static bool
     709        31448 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     710              :                       gimple *stmt1, tree vectype)
     711              : {
     712        31448 :   if (is_pattern_stmt_p (stmt2_info))
     713              :     {
     714              :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     715              :          the pattern is attached.  */
     716          460 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     717          460 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     718              : 
     719          460 :       if (dump_enabled_p ())
     720           19 :         dump_printf_loc (MSG_NOTE, vect_location,
     721              :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     722              : 
     723              :       /* Since STMT2_INFO is a pattern statement, we can change it
     724              :          in-situ without worrying about changing the code for the
     725              :          containing block.  */
     726          460 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     727              : 
     728          460 :       if (dump_enabled_p ())
     729              :         {
     730           19 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     731           19 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     732              :                            stmt2_info->stmt);
     733              :         }
     734              : 
     735          460 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     736          460 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     737              :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     738              :            to the end of the definition sequence.  */
     739          457 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     740              :       else
     741              :         {
     742              :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     743              :              before it.  */
     744            3 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     745            3 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     746              :         }
     747          460 :       return true;
     748              :     }
     749              :   else
     750              :     {
     751              :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     752              :          two-statement pattern now.  */
     753        30988 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     754        30988 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     755        30988 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     756        30988 :       if (!lhs_vectype)
     757              :         return false;
     758              : 
     759        30988 :       if (dump_enabled_p ())
     760         1927 :         dump_printf_loc (MSG_NOTE, vect_location,
     761              :                          "Splitting statement: %G", stmt2_info->stmt);
     762              : 
     763              :       /* Add STMT1 as a singleton pattern definition sequence.  */
     764        30988 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     765        30988 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     766        30988 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     767              : 
     768              :       /* Build the second of the two pattern statements.  */
     769        30988 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     770        30988 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     771        30988 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     772              : 
     773        30988 :       if (dump_enabled_p ())
     774              :         {
     775         1927 :           dump_printf_loc (MSG_NOTE, vect_location,
     776              :                            "into pattern statements: %G", stmt1);
     777         1927 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     778              :                            (gimple *) new_stmt2);
     779              :         }
     780              : 
     781        30988 :       return true;
     782              :     }
     783              : }
     784              : 
     785              : /* Look for the following pattern
     786              :         X = x[i]
     787              :         Y = y[i]
     788              :         DIFF = X - Y
     789              :         DAD = ABS_EXPR<DIFF>
     790              : 
     791              :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     792              :    HALF_TYPE and UNPROM will be set should the statement be found to
     793              :    be a widened operation.
     794              :    DIFF_STMT will be set to the MINUS_EXPR
     795              :    statement that precedes the ABS_STMT if it is a MINUS_EXPR..
     796              :  */
     797              : static bool
     798     21107533 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     799              :                                 tree *half_type,
     800              :                                 vect_unpromoted_value unprom[2],
     801              :                                 gassign **diff_stmt)
     802              : {
     803     21107533 :   if (!abs_stmt)
     804              :     return false;
     805              : 
     806              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     807              :      inside the loop (in case we are analyzing an outer-loop).  */
     808     21107533 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     809     21107533 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     810              :     return false;
     811              : 
     812        24381 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     813        24381 :   tree abs_type = TREE_TYPE (abs_oprnd);
     814        24381 :   if (!abs_oprnd)
     815              :     return false;
     816        16861 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     817         7816 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     818        32049 :       || TYPE_UNSIGNED (abs_type))
     819              :     return false;
     820              : 
     821              :   /* Peel off conversions from the ABS input.  This can involve sign
     822              :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     823              :      or signed promotion, but it can't include unsigned promotion.
     824              :      (Note that ABS of an unsigned promotion should have been folded
     825              :      away before now anyway.)  */
     826         7668 :   vect_unpromoted_value unprom_diff;
     827         7668 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     828              :                                                     &unprom_diff);
     829         7668 :   if (!abs_oprnd)
     830              :     return false;
     831         7372 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     832         7372 :       && TYPE_UNSIGNED (unprom_diff.type))
     833              :     return false;
     834              : 
     835              :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     836         7372 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     837         7372 :   if (!diff_stmt_vinfo)
     838              :     return false;
     839              : 
     840         7201 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     841         7201 :   if (diff_stmt && diff
     842         5765 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     843         9306 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     844          275 :     *diff_stmt = diff;
     845              : 
     846              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     847              :      inside the loop (in case we are analyzing an outer-loop).  */
     848         7201 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     849         7201 :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     850              :                             false, 2, unprom, half_type))
     851              :     return true;
     852              : 
     853              :   return false;
     854              : }
     855              : 
     856              : /* Convert UNPROM to TYPE and return the result, adding new statements
     857              :    to STMT_INFO's pattern definition statements if no better way is
     858              :    available.  VECTYPE is the vector form of TYPE.
     859              : 
     860              :    If SUBTYPE then convert the type based on the subtype.  */
     861              : 
     862              : static tree
     863       474920 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     864              :                     vect_unpromoted_value *unprom, tree vectype,
     865              :                     enum optab_subtype subtype = optab_default)
     866              : {
     867              :   /* Update the type if the signs differ.  */
     868       474920 :   if (subtype == optab_vector_mixed_sign)
     869              :     {
     870          322 :       gcc_assert (!TYPE_UNSIGNED (type));
     871          322 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     872              :         {
     873          161 :           type = unsigned_type_for (type);
     874          161 :           vectype = unsigned_type_for (vectype);
     875              :         }
     876              :     }
     877              : 
     878              :   /* Check for a no-op conversion.  */
     879       474920 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     880       163517 :     return unprom->op;
     881              : 
     882              :   /* Allow the caller to create constant vect_unpromoted_values.  */
     883       311403 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     884       190828 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     885              : 
     886       120575 :   tree input = unprom->op;
     887       120575 :   if (unprom->caster)
     888              :     {
     889        65635 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     890        65635 :       tree lhs_type = TREE_TYPE (lhs);
     891              : 
     892              :       /* If the result of the existing cast is the right width, use it
     893              :          instead of the source of the cast.  */
     894        65635 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     895              :         input = lhs;
     896              :       /* If the precision we want is between the source and result
     897              :          precisions of the existing cast, try splitting the cast into
     898              :          two and tapping into a mid-way point.  */
     899        63573 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     900        63573 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     901              :         {
     902              :           /* In order to preserve the semantics of the original cast,
     903              :              give the mid-way point the same signedness as the input value.
     904              : 
     905              :              It would be possible to use a signed type here instead if
     906              :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     907              :              make the sign of the midtype sensitive to the order in
     908              :              which we process the statements, since the signedness of
     909              :              TYPE is the signedness required by just one of possibly
     910              :              many users.  Also, unsigned promotions are usually as cheap
     911              :              as or cheaper than signed ones, so it's better to keep an
     912              :              unsigned promotion.  */
     913        31448 :           tree midtype = build_nonstandard_integer_type
     914        31448 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     915        31448 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     916        31448 :           if (vec_midtype)
     917              :             {
     918        31448 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     919        31448 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     920              :                                                        unprom->op);
     921        31448 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     922              :                                          vec_midtype))
     923            0 :                 append_pattern_def_seq (vinfo, stmt_info,
     924              :                                         new_stmt, vec_midtype);
     925              :             }
     926              :         }
     927              : 
     928              :       /* See if we can reuse an existing result.  */
     929        65635 :       if (types_compatible_p (type, TREE_TYPE (input)))
     930              :         return input;
     931              :     }
     932              : 
     933              :   /* We need a new conversion statement.  */
     934        97771 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     935        97771 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     936              : 
     937              :   /* If OP is an external value, see if we can insert the new statement
     938              :      on an incoming edge.  */
     939        97771 :   if (input == unprom->op && unprom->dt == vect_external_def)
     940         8593 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     941              :       {
     942          804 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     943          804 :         gcc_assert (!new_bb);
     944              :         return new_op;
     945              :       }
     946              : 
     947              :   /* As a (common) last resort, add the statement to the pattern itself.  */
     948        96967 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     949        96967 :   return new_op;
     950              : }
     951              : 
     952              : /* Invoke vect_convert_input for N elements of UNPROM and store the
     953              :    result in the corresponding elements of RESULT.
     954              : 
     955              :    If SUBTYPE then convert the type based on the subtype.  */
     956              : 
     957              : static void
     958       241353 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     959              :                      tree *result, tree type, vect_unpromoted_value *unprom,
     960              :                      tree vectype, enum optab_subtype subtype = optab_default)
     961              : {
     962       716244 :   for (unsigned int i = 0; i < n; ++i)
     963              :     {
     964              :       unsigned int j;
     965       707958 :       for (j = 0; j < i; ++j)
     966       233538 :         if (unprom[j].op == unprom[i].op)
     967              :           break;
     968              : 
     969       474891 :       if (j < i)
     970          471 :         result[i] = result[j];
     971              :       else
     972       474420 :         result[i] = vect_convert_input (vinfo, stmt_info,
     973       474420 :                                         type, &unprom[i], vectype, subtype);
     974              :     }
     975       241353 : }
     976              : 
     977              : /* The caller has created a (possibly empty) sequence of pattern definition
     978              :    statements followed by a single statement PATTERN_STMT.  Cast the result
     979              :    of this final statement to TYPE.  If a new statement is needed, add
     980              :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     981              :    and return the new statement, otherwise return PATTERN_STMT as-is.
     982              :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     983              : 
     984              : static gimple *
     985       268082 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     986              :                      gimple *pattern_stmt, tree vecitype)
     987              : {
     988       268082 :   tree lhs = gimple_get_lhs (pattern_stmt);
     989       268082 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     990              :     {
     991       240628 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
     992       240628 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     993       240628 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     994              :     }
     995       268082 :   return pattern_stmt;
     996              : }
     997              : 
     998              : /* Return true if STMT_VINFO describes a reduction for which reassociation
     999              :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1000              :    a reduction chain and optimistically assume that all statements
    1001              :    except the last allow reassociation.
    1002              :    Also require it to have code CODE and to be a reduction
    1003              :    in the outermost loop.  When returning true, store the operands in
    1004              :    *OP0_OUT and *OP1_OUT.  */
    1005              : 
    1006              : static bool
    1007     92551743 : vect_reassociating_reduction_p (vec_info *vinfo,
    1008              :                                 stmt_vec_info stmt_info, tree_code code,
    1009              :                                 tree *op0_out, tree *op1_out)
    1010              : {
    1011     92551743 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1012     13424703 :   if (!loop_info)
    1013              :     return false;
    1014              : 
    1015     13424703 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1016     14660064 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1017              :     return false;
    1018              : 
    1019              :   /* We don't allow changing the order of the computation in the inner-loop
    1020              :      when doing outer-loop vectorization.  */
    1021      2626808 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1022     95019687 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1023              :     return false;
    1024              : 
    1025      2573696 :   if (!vect_is_reduction (stmt_info))
    1026              :     return false;
    1027              : 
    1028       170423 :   if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1029       170423 :                                    code))
    1030              :     return false;
    1031              : 
    1032       158864 :   *op0_out = gimple_assign_rhs1 (assign);
    1033       158864 :   *op1_out = gimple_assign_rhs2 (assign);
    1034       158864 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1035        63702 :     std::swap (*op0_out, *op1_out);
    1036              :   return true;
    1037              : }
    1038              : 
    1039              : /* Return true iff the target has a vector optab implementing the operation
    1040              :    CODE on type VECTYPE with SUBTYPE.  */
    1041              : 
    1042              : static bool
    1043       842991 : target_has_vecop_for_code (tree_code code, tree vectype,
    1044              :                            enum optab_subtype subtype = optab_vector)
    1045              : {
    1046       842991 :   optab voptab = optab_for_tree_code (code, vectype, subtype);
    1047       842991 :   return voptab && can_implement_p (voptab, TYPE_MODE (vectype));
    1048              : }
    1049              : 
    1050              : /* match.pd function to match
    1051              :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1052              :    with conditions:
    1053              :    1) @1, @2, c, d, a, b are all integral type.
    1054              :    2) There's single_use for both @1 and @2.
    1055              :    3) a, c have same precision.
    1056              :    4) c and @1 have different precision.
    1057              :    5) c, d are the same type or they can differ in sign when convert is
    1058              :    truncation.
    1059              : 
    1060              :    record a and c and d and @3.  */
    1061              : 
    1062              : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1063              : 
    1064              : /* Function vect_recog_cond_expr_convert
    1065              : 
    1066              :    Try to find the following pattern:
    1067              : 
    1068              :    TYPE_AB A,B;
    1069              :    TYPE_CD C,D;
    1070              :    TYPE_E E;
    1071              :    TYPE_E op_true = (TYPE_E) A;
    1072              :    TYPE_E op_false = (TYPE_E) B;
    1073              : 
    1074              :    E = C cmp D ? op_true : op_false;
    1075              : 
    1076              :    where
    1077              :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1078              :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1079              :    single_use of op_true and op_false.
    1080              :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1081              : 
    1082              :    Input:
    1083              : 
    1084              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1085              :    here it starts with E = c cmp D ? op_true : op_false;
    1086              : 
    1087              :    Output:
    1088              : 
    1089              :    TYPE1 E' = C cmp D ? A : B;
    1090              :    TYPE3 E = (TYPE3) E';
    1091              : 
    1092              :    There may extra nop_convert for A or B to handle different signness.
    1093              : 
    1094              :    * TYPE_OUT: The vector type of the output of this pattern.
    1095              : 
    1096              :    * Return value: A new stmt that will be used to replace the sequence of
    1097              :    stmts that constitute the pattern. In this case it will be:
    1098              :    E = (TYPE3)E';
    1099              :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1100              : 
    1101              : static gimple *
    1102     30922576 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1103              :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1104              : {
    1105     30922576 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1106     21199158 :   tree lhs, match[4], temp, type, new_lhs, op2, op1;
    1107     21199158 :   gimple *cond_stmt;
    1108     21199158 :   gimple *pattern_stmt;
    1109     30922547 :   enum tree_code code = NOP_EXPR;
    1110              : 
    1111     21199158 :   if (!last_stmt)
    1112              :     return NULL;
    1113              : 
    1114     21199158 :   lhs = gimple_assign_lhs (last_stmt);
    1115              : 
    1116              :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1117              :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1118     21199158 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1119              :     return NULL;
    1120              : 
    1121           29 :   if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    1122           20 :     code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
    1123            9 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
    1124            0 :     code = FIX_TRUNC_EXPR;
    1125              : 
    1126           29 :   op1 = match[1];
    1127           29 :   op2 = match[2];
    1128           29 :   type = TREE_TYPE (op1);
    1129              :   /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
    1130              :      SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
    1131              :      Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
    1132              :      or CONVERT_EXPR.  */
    1133           29 :   if (TREE_CODE (op1) == REAL_CST)
    1134              :     {
    1135           20 :       op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
    1136           20 :       type = TREE_TYPE (op2);
    1137           20 :       if (op1 == NULL_TREE)
    1138              :         return NULL;
    1139              :     }
    1140            9 :   else if (TREE_CODE (op2) == REAL_CST)
    1141              :     {
    1142            0 :       op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
    1143            0 :       if (op2 == NULL_TREE)
    1144              :         return NULL;
    1145              :     }
    1146            9 :   else if (code == NOP_EXPR)
    1147              :     {
    1148            9 :       if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1149              :         {
    1150            9 :           op2 = vect_recog_temp_ssa_var (type, NULL);
    1151            9 :           gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1152            9 :           append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt);
    1153              :         }
    1154              :     }
    1155              : 
    1156           29 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1157              : 
    1158           29 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1159           29 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1160              :                                                  op1, op2));
    1161           29 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt);
    1162           29 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1163           29 :   pattern_stmt = gimple_build_assign (new_lhs, code, temp);
    1164           29 :   *type_out = NULL_TREE;
    1165              : 
    1166           29 :   if (dump_enabled_p ())
    1167           20 :     dump_printf_loc (MSG_NOTE, vect_location,
    1168              :                      "created pattern stmt: %G", pattern_stmt);
    1169              :   return pattern_stmt;
    1170              : }
    1171              : 
    1172              : /* Function vect_recog_dot_prod_pattern
    1173              : 
    1174              :    Try to find the following pattern:
    1175              : 
    1176              :      type1a x_t
    1177              :      type1b y_t;
    1178              :      TYPE1 prod;
    1179              :      TYPE2 sum = init;
    1180              :    loop:
    1181              :      sum_0 = phi <init, sum_1>
    1182              :      S1  x_t = ...
    1183              :      S2  y_t = ...
    1184              :      S3  x_T = (TYPE1) x_t;
    1185              :      S4  y_T = (TYPE1) y_t;
    1186              :      S5  prod = x_T * y_T;
    1187              :      [S6  prod = (TYPE2) prod;  #optional]
    1188              :      S7  sum_1 = prod + sum_0;
    1189              : 
    1190              :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1191              :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1192              :    'type1a' and 'type1b' can differ.
    1193              : 
    1194              :    Input:
    1195              : 
    1196              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1197              :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1198              :    will be detected.
    1199              : 
    1200              :    Output:
    1201              : 
    1202              :    * TYPE_OUT: The type of the output  of this pattern.
    1203              : 
    1204              :    * Return value: A new stmt that will be used to replace the sequence of
    1205              :    stmts that constitute the pattern. In this case it will be:
    1206              :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1207              : 
    1208              :    Note: The dot-prod idiom is a widening reduction pattern that is
    1209              :          vectorized without preserving all the intermediate results. It
    1210              :          produces only N/2 (widened) results (by summing up pairs of
    1211              :          intermediate results) rather than all N results.  Therefore, we
    1212              :          cannot allow this pattern when we want to get all the results and in
    1213              :          the correct order (as is the case when this computation is in an
    1214              :          inner-loop nested in an outer-loop that us being vectorized).  */
    1215              : 
    1216              : static gimple *
    1217     30851191 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1218              :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1219              : {
    1220     30851191 :   tree oprnd0, oprnd1;
    1221     30851191 :   gimple *last_stmt = stmt_vinfo->stmt;
    1222     30851191 :   tree type, half_type;
    1223     30851191 :   gimple *pattern_stmt;
    1224     30851191 :   tree var;
    1225              : 
    1226              :   /* Look for the following pattern
    1227              :           DX = (TYPE1) X;
    1228              :           DY = (TYPE1) Y;
    1229              :           DPROD = DX * DY;
    1230              :           DDPROD = (TYPE2) DPROD;
    1231              :           sum_1 = DDPROD + sum_0;
    1232              :      In which
    1233              :      - DX is double the size of X
    1234              :      - DY is double the size of Y
    1235              :      - DX, DY, DPROD all have the same type but the sign
    1236              :        between X, Y and DPROD can differ.
    1237              :      - sum is the same size of DPROD or bigger
    1238              :      - sum has been recognized as a reduction variable.
    1239              : 
    1240              :      This is equivalent to:
    1241              :        DPROD = X w* Y;          #widen mult
    1242              :        sum_1 = DPROD w+ sum_0;  #widen summation
    1243              :      or
    1244              :        DPROD = X w* Y;          #widen mult
    1245              :        sum_1 = DPROD + sum_0;   #summation
    1246              :    */
    1247              : 
    1248              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1249              :      of the above pattern.  */
    1250              : 
    1251     30851191 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1252              :                                        &oprnd0, &oprnd1))
    1253              :     return NULL;
    1254              : 
    1255        53568 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1256              : 
    1257        53568 :   vect_unpromoted_value unprom_mult;
    1258        53568 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1259              : 
    1260              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1261              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1262              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1263              :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1264        53568 :   if (!oprnd0)
    1265              :     return NULL;
    1266              : 
    1267        36285 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1268        36285 :   if (!mult_vinfo)
    1269              :     return NULL;
    1270              : 
    1271              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1272              :      inside the loop (in case we are analyzing an outer-loop).  */
    1273       106320 :   vect_unpromoted_value unprom0[2];
    1274        35440 :   enum optab_subtype subtype = optab_vector;
    1275        35440 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1276              :                              false, 2, unprom0, &half_type, &subtype))
    1277              :     return NULL;
    1278              : 
    1279              :   /* If there are two widening operations, make sure they agree on the sign
    1280              :      of the extension.  The result of an optab_vector_mixed_sign operation
    1281              :      is signed; otherwise, the result has the same sign as the operands.  */
    1282         1363 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1283         2085 :       && (subtype == optab_vector_mixed_sign
    1284          722 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1285          519 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1286              :     return NULL;
    1287              : 
    1288         1282 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1289              : 
    1290              :   /* If the inputs have mixed signs, canonicalize on using the signed
    1291              :      input type for analysis.  This also helps when emulating mixed-sign
    1292              :      operations using signed operations.  */
    1293         1282 :   if (subtype == optab_vector_mixed_sign)
    1294          240 :     half_type = signed_type_for (half_type);
    1295              : 
    1296         1282 :   tree half_vectype;
    1297         1282 :   if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1298              :                                         type_out, &half_vectype, subtype))
    1299              :     {
    1300              :       /* We can emulate a mixed-sign dot-product using a sequence of
    1301              :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1302          583 :       if (subtype != optab_vector_mixed_sign
    1303          583 :           || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
    1304              :                                                DOT_PROD_EXPR, half_type,
    1305              :                                                type_out, &half_vectype,
    1306              :                                                optab_vector))
    1307          568 :         return NULL;
    1308              : 
    1309           15 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1310              :                                                *type_out);
    1311              :     }
    1312              : 
    1313              :   /* Get the inputs in the appropriate types.  */
    1314          714 :   tree mult_oprnd[2];
    1315          714 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1316              :                        unprom0, half_vectype, subtype);
    1317              : 
    1318          714 :   var = vect_recog_temp_ssa_var (type, NULL);
    1319          714 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1320              :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1321              : 
    1322          714 :   return pattern_stmt;
    1323              : }
    1324              : 
    1325              : 
    1326              : /* Function vect_recog_sad_pattern
    1327              : 
    1328              :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1329              : 
    1330              :      type x_t, y_t;
    1331              :      signed TYPE1 diff, abs_diff;
    1332              :      TYPE2 sum = init;
    1333              :    loop:
    1334              :      sum_0 = phi <init, sum_1>
    1335              :      S1  x_t = ...
    1336              :      S2  y_t = ...
    1337              :      S3  x_T = (TYPE1) x_t;
    1338              :      S4  y_T = (TYPE1) y_t;
    1339              :      S5  diff = x_T - y_T;
    1340              :      S6  abs_diff = ABS_EXPR <diff>;
    1341              :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1342              :      S8  sum_1 = abs_diff + sum_0;
    1343              : 
    1344              :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1345              :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1346              :    computation.
    1347              : 
    1348              :    Input:
    1349              : 
    1350              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1351              :    example, when this function is called with S8, the pattern
    1352              :    {S3,S4,S5,S6,S7,S8} will be detected.
    1353              : 
    1354              :    Output:
    1355              : 
    1356              :    * TYPE_OUT: The type of the output of this pattern.
    1357              : 
    1358              :    * Return value: A new stmt that will be used to replace the sequence of
    1359              :    stmts that constitute the pattern. In this case it will be:
    1360              :         SAD_EXPR <x_t, y_t, sum_0>
    1361              :   */
    1362              : 
    1363              : static gimple *
    1364     30850482 : vect_recog_sad_pattern (vec_info *vinfo,
    1365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1366              : {
    1367     30850482 :   gimple *last_stmt = stmt_vinfo->stmt;
    1368     30850482 :   tree half_type;
    1369              : 
    1370              :   /* Look for the following pattern
    1371              :           DX = (TYPE1) X;
    1372              :           DY = (TYPE1) Y;
    1373              :           DDIFF = DX - DY;
    1374              :           DAD = ABS_EXPR <DDIFF>;
    1375              :           DDPROD = (TYPE2) DPROD;
    1376              :           sum_1 = DAD + sum_0;
    1377              :      In which
    1378              :      - DX is at least double the size of X
    1379              :      - DY is at least double the size of Y
    1380              :      - DX, DY, DDIFF, DAD all have the same type
    1381              :      - sum is the same size of DAD or bigger
    1382              :      - sum has been recognized as a reduction variable.
    1383              : 
    1384              :      This is equivalent to:
    1385              :        DDIFF = X w- Y;          #widen sub
    1386              :        DAD = ABS_EXPR <DDIFF>;
    1387              :        sum_1 = DAD w+ sum_0;    #widen summation
    1388              :      or
    1389              :        DDIFF = X w- Y;          #widen sub
    1390              :        DAD = ABS_EXPR <DDIFF>;
    1391              :        sum_1 = DAD + sum_0;     #summation
    1392              :    */
    1393              : 
    1394              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1395              :      of the above pattern.  */
    1396              : 
    1397     30850482 :   tree plus_oprnd0, plus_oprnd1;
    1398     30850482 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1399              :                                        &plus_oprnd0, &plus_oprnd1))
    1400              :     return NULL;
    1401              : 
    1402        52854 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1403              : 
    1404              :   /* Any non-truncating sequence of conversions is OK here, since
    1405              :      with a successful match, the result of the ABS(U) is known to fit
    1406              :      within the nonnegative range of the result type.  (It cannot be the
    1407              :      negative of the minimum signed value due to the range of the widening
    1408              :      MINUS_EXPR.)  */
    1409        52854 :   vect_unpromoted_value unprom_abs;
    1410        52854 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1411              :                                                       &unprom_abs);
    1412              : 
    1413              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1414              :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1415              :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1416              :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1417              : 
    1418        52854 :   if (!plus_oprnd0)
    1419              :     return NULL;
    1420              : 
    1421        35571 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1422        35571 :   if (!abs_stmt_vinfo)
    1423              :     return NULL;
    1424              : 
    1425              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1426              :      inside the loop (in case we are analyzing an outer-loop).  */
    1427        34726 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1428       104178 :   vect_unpromoted_value unprom[2];
    1429              : 
    1430        34726 :   if (!abs_stmt)
    1431              :     {
    1432     30850372 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1433          302 :       if (!abd_stmt
    1434          302 :           || !gimple_call_internal_p (abd_stmt)
    1435            0 :           || gimple_call_num_args (abd_stmt) != 2)
    1436              :         return NULL;
    1437              : 
    1438            0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1439            0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1440              : 
    1441            0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
    1442            0 :           || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1443              :         {
    1444            0 :           unprom[0].op = abd_oprnd0;
    1445            0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1446            0 :           unprom[1].op = abd_oprnd1;
    1447            0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1448              :         }
    1449              :       else
    1450              :         return NULL;
    1451              : 
    1452            0 :       half_type = unprom[0].type;
    1453              :     }
    1454        34359 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1455              :                                             unprom, NULL))
    1456              :     return NULL;
    1457              : 
    1458          806 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1459              : 
    1460          806 :   tree half_vectype;
    1461          806 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1462              :                                         type_out, &half_vectype))
    1463              :     return NULL;
    1464              : 
    1465              :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1466          412 :   tree sad_oprnd[2];
    1467          412 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1468              :                        unprom, half_vectype);
    1469              : 
    1470          412 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1471          412 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1472              :                                               sad_oprnd[1], plus_oprnd1);
    1473              : 
    1474          412 :   return pattern_stmt;
    1475              : }
    1476              : 
    1477              : /* Function vect_recog_abd_pattern
    1478              : 
    1479              :    Try to find the following ABsolute Difference (ABD) or
    1480              :    widening ABD (WIDEN_ABD) pattern:
    1481              : 
    1482              :    TYPE1 x;
    1483              :    TYPE2 y;
    1484              :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1485              :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1486              :    TYPE3 diff = x_cast - y_cast;
    1487              :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1488              :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1489              : 
    1490              :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1491              :    twice as wide as TYPE3.
    1492              : 
    1493              :    Input:
    1494              : 
    1495              :    * STMT_VINFO: The stmt from which the pattern search begins
    1496              : 
    1497              :    Output:
    1498              : 
    1499              :    * TYPE_OUT: The type of the output of this pattern
    1500              : 
    1501              :    * Return value: A new stmt that will be used to replace the sequence of
    1502              :      stmts that constitute the pattern, principally:
    1503              :         out = IFN_ABD (x, y)
    1504              :         out = IFN_WIDEN_ABD (x, y)
    1505              :  */
    1506              : 
    1507              : static gimple *
    1508     30796434 : vect_recog_abd_pattern (vec_info *vinfo,
    1509              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1510              : {
    1511     51869608 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1512     21073174 :   if (!last_stmt)
    1513              :     return NULL;
    1514              : 
    1515     21073174 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1516              : 
    1517     63219522 :   vect_unpromoted_value unprom[2];
    1518     21073174 :   gassign *diff_stmt = NULL;
    1519     21073174 :   tree abd_in_type;
    1520     21073174 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
    1521              :                                        unprom, &diff_stmt))
    1522              :     {
    1523              :       /* We cannot try further without having a non-widening MINUS.  */
    1524     21071560 :       if (!diff_stmt)
    1525              :         return NULL;
    1526              : 
    1527          275 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1528          275 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1529          275 :       abd_in_type = signed_type_for (out_type);
    1530              :     }
    1531              : 
    1532         1889 :   tree abd_out_type = abd_in_type;
    1533              : 
    1534         1889 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1535         1889 :   if (!vectype_in)
    1536              :     return NULL;
    1537              : 
    1538         1872 :   internal_fn ifn = IFN_ABD;
    1539         1872 :   tree vectype_out = vectype_in;
    1540              : 
    1541         1872 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1542         1872 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1543              :     {
    1544         1505 :       tree mid_type
    1545         1505 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1546         1505 :                                           TYPE_UNSIGNED (abd_in_type));
    1547         1505 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1548              : 
    1549         1505 :       code_helper dummy_code;
    1550         1505 :       int dummy_int;
    1551         1505 :       auto_vec<tree> dummy_vec;
    1552         1505 :       if (mid_vectype
    1553         1505 :           && supportable_widening_operation (IFN_VEC_WIDEN_ABD,
    1554              :                                              mid_vectype, vectype_in, false,
    1555              :                                              &dummy_code, &dummy_code,
    1556              :                                              &dummy_int, &dummy_vec))
    1557              :         {
    1558            0 :           ifn = IFN_VEC_WIDEN_ABD;
    1559            0 :           abd_out_type = mid_type;
    1560            0 :           vectype_out = mid_vectype;
    1561              :         }
    1562         1505 :     }
    1563              : 
    1564         1505 :   if (ifn == IFN_ABD
    1565         1872 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1566              :                                           OPTIMIZE_FOR_SPEED))
    1567              :     return NULL;
    1568              : 
    1569            0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1570              : 
    1571            0 :   tree abd_oprnds[2];
    1572            0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1573              :                        abd_in_type, unprom, vectype_in);
    1574              : 
    1575            0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1576              : 
    1577            0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1578            0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1579              :                                                 abd_oprnds[0], abd_oprnds[1]);
    1580            0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1581            0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1582              : 
    1583            0 :   gimple *stmt = abd_stmt;
    1584            0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1585            0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1586            0 :       && !TYPE_UNSIGNED (abd_out_type))
    1587              :     {
    1588            0 :       tree unsign = unsigned_type_for (abd_out_type);
    1589            0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1590            0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1591              :     }
    1592              : 
    1593            0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1594              : }
    1595              : 
    1596              : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1597              :    so that it can be treated as though it had the form:
    1598              : 
    1599              :       A_TYPE a;
    1600              :       B_TYPE b;
    1601              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1602              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1603              :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1604              :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1605              :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1606              : 
    1607              :    Try to replace the pattern with:
    1608              : 
    1609              :       A_TYPE a;
    1610              :       B_TYPE b;
    1611              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1612              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1613              :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1614              :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1615              : 
    1616              :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1617              : 
    1618              :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1619              :    name of the pattern being matched, for dump purposes.  */
    1620              : 
    1621              : static gimple *
    1622    124008541 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1623              :                              stmt_vec_info last_stmt_info, tree *type_out,
    1624              :                              tree_code orig_code, code_helper wide_code,
    1625              :                              bool shift_p, const char *name)
    1626              : {
    1627    124008541 :   gimple *last_stmt = last_stmt_info->stmt;
    1628              : 
    1629    372025623 :   vect_unpromoted_value unprom[2];
    1630    124008541 :   tree half_type;
    1631    124008541 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1632              :                              shift_p, 2, unprom, &half_type))
    1633              : 
    1634              :     return NULL;
    1635              : 
    1636              :   /* Pattern detected.  */
    1637       317196 :   vect_pattern_detected (name, last_stmt);
    1638              : 
    1639       317196 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1640       317196 :   tree itype = type;
    1641       317196 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1642       317196 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1643       223261 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1644       223261 :                                             TYPE_UNSIGNED (half_type));
    1645              : 
    1646              :   /* Check target support  */
    1647       317196 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1648       317196 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1649       317196 :   tree ctype = itype;
    1650       317196 :   tree vecctype = vecitype;
    1651       317196 :   if (orig_code == MINUS_EXPR
    1652         9485 :       && TYPE_UNSIGNED (itype)
    1653       321692 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1654              :     {
    1655              :       /* Subtraction is special, even if half_type is unsigned and no matter
    1656              :          whether type is signed or unsigned, if type is wider than itype,
    1657              :          we need to sign-extend from the widening operation result to the
    1658              :          result type.
    1659              :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1660              :          itype unsigned short and type either int or unsigned int.
    1661              :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1662              :          (unsigned short) 0xffff, but for type int we want the result -1
    1663              :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1664          694 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1665          694 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1666              :     }
    1667              : 
    1668       317196 :   code_helper dummy_code;
    1669       317196 :   int dummy_int;
    1670       317196 :   auto_vec<tree> dummy_vec;
    1671       317196 :   if (!vectype
    1672       317196 :       || !vecitype
    1673       247046 :       || !vecctype
    1674       564242 :       || !supportable_widening_operation (wide_code, vecitype, vectype, true,
    1675              :                                           &dummy_code, &dummy_code,
    1676              :                                           &dummy_int, &dummy_vec))
    1677       210796 :     return NULL;
    1678              : 
    1679       106400 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1680       106400 :   if (!*type_out)
    1681              :     return NULL;
    1682              : 
    1683       106400 :   tree oprnd[2];
    1684       106400 :   vect_convert_inputs (vinfo, last_stmt_info,
    1685              :                        2, oprnd, half_type, unprom, vectype);
    1686              : 
    1687       106400 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1688       106400 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1689              : 
    1690       106400 :   if (vecctype != vecitype)
    1691            0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1692              :                                         pattern_stmt, vecitype);
    1693              : 
    1694       106400 :   return vect_convert_output (vinfo, last_stmt_info,
    1695       106400 :                               type, pattern_stmt, vecctype);
    1696       317196 : }
    1697              : 
    1698              : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1699              :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1700              : 
    1701              : static gimple *
    1702     30875925 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1703              :                                tree *type_out)
    1704              : {
    1705     30875925 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1706     30875925 :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1707     30875925 :                                       "vect_recog_widen_mult_pattern");
    1708              : }
    1709              : 
    1710              : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1711              :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1712              : 
    1713              : static gimple *
    1714     31141001 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1715              :                                tree *type_out)
    1716              : {
    1717     31141001 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1718     31141001 :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1719     31141001 :                                       false, "vect_recog_widen_plus_pattern");
    1720              : }
    1721              : 
    1722              : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1723              :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1724              : static gimple *
    1725     31141001 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1726              :                                tree *type_out)
    1727              : {
    1728     31141001 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1729     31141001 :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1730     31141001 :                                       false, "vect_recog_widen_minus_pattern");
    1731              : }
    1732              : 
    1733              : /* Try to detect abd on widened inputs, converting IFN_ABD
    1734              :    to IFN_VEC_WIDEN_ABD.  */
    1735              : static gimple *
    1736     31141001 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1737              :                               tree *type_out)
    1738              : {
    1739     31141001 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1740     28845243 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1741              :     return NULL;
    1742              : 
    1743      3035471 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1744              : 
    1745      3035471 :   tree in_type = TREE_TYPE (last_rhs);
    1746      3035471 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1747      3035471 :   if (!INTEGRAL_TYPE_P (in_type)
    1748      2723614 :       || !INTEGRAL_TYPE_P (out_type)
    1749      2607087 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1750      3658679 :       || !TYPE_UNSIGNED (in_type))
    1751              :     return NULL;
    1752              : 
    1753       217073 :   vect_unpromoted_value unprom;
    1754       217073 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1755       217073 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1756              :     return NULL;
    1757              : 
    1758       214564 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1759       214564 :   if (!abd_pattern_vinfo)
    1760              :     return NULL;
    1761              : 
    1762     31150165 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1763         9164 :   if (!abd_stmt
    1764         9164 :       || !gimple_call_internal_p (abd_stmt)
    1765          265 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1766              :     return NULL;
    1767              : 
    1768            0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1769            0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1770              : 
    1771            0 :   code_helper dummy_code;
    1772            0 :   int dummy_int;
    1773            0 :   auto_vec<tree> dummy_vec;
    1774            0 :   if (!supportable_widening_operation (IFN_VEC_WIDEN_ABD, vectype_out,
    1775              :                                        vectype_in, false,
    1776              :                                        &dummy_code, &dummy_code,
    1777              :                                        &dummy_int, &dummy_vec))
    1778              :     return NULL;
    1779              : 
    1780            0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1781              : 
    1782            0 :   *type_out = vectype_out;
    1783              : 
    1784            0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1785            0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1786            0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1787            0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1788              :                                                       abd_oprnd0, abd_oprnd1);
    1789            0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1790            0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1791            0 :   return widen_abd_stmt;
    1792            0 : }
    1793              : 
    1794              : /* Function vect_recog_ctz_ffs_pattern
    1795              : 
    1796              :    Try to find the following pattern:
    1797              : 
    1798              :    TYPE1 A;
    1799              :    TYPE1 B;
    1800              : 
    1801              :    B = __builtin_ctz{,l,ll} (A);
    1802              : 
    1803              :    or
    1804              : 
    1805              :    B = __builtin_ffs{,l,ll} (A);
    1806              : 
    1807              :    Input:
    1808              : 
    1809              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1810              :    here it starts with B = __builtin_* (A);
    1811              : 
    1812              :    Output:
    1813              : 
    1814              :    * TYPE_OUT: The vector type of the output of this pattern.
    1815              : 
    1816              :    * Return value: A new stmt that will be used to replace the sequence of
    1817              :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1818              : 
    1819              : static gimple *
    1820     30850421 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1821              :                             tree *type_out)
    1822              : {
    1823     30850421 :   gimple *call_stmt = stmt_vinfo->stmt;
    1824     30850421 :   gimple *pattern_stmt;
    1825     30850421 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1826     30850421 :   tree new_var;
    1827     30850421 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1828     30850421 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1829     30850421 :   int val = 0, val_new = 0, val_cmp = 0;
    1830     30850421 :   int prec;
    1831     30850421 :   int sub = 0, add = 0;
    1832     30850421 :   location_t loc;
    1833              : 
    1834     30850421 :   if (!is_gimple_call (call_stmt))
    1835              :     return NULL;
    1836              : 
    1837      3577157 :   if (gimple_call_num_args (call_stmt) != 1
    1838      3577157 :       && gimple_call_num_args (call_stmt) != 2)
    1839              :     return NULL;
    1840              : 
    1841      1995561 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1842      1995561 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1843      1995561 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1844      1995561 :   if (!lhs_oprnd)
    1845              :     return NULL;
    1846       975241 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1847       975241 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1848       327291 :       || !INTEGRAL_TYPE_P (rhs_type)
    1849        44566 :       || !type_has_mode_precision_p (rhs_type)
    1850      1018220 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1851       944462 :     return NULL;
    1852              : 
    1853        30779 :   switch (gimple_call_combined_fn (call_stmt))
    1854              :     {
    1855         1554 :     CASE_CFN_CTZ:
    1856         1554 :       ifn = IFN_CTZ;
    1857         1554 :       if (!gimple_call_internal_p (call_stmt)
    1858         1554 :           || gimple_call_num_args (call_stmt) != 2)
    1859              :         defined_at_zero = false;
    1860              :       else
    1861          121 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1862              :       break;
    1863              :     CASE_CFN_FFS:
    1864              :       ifn = IFN_FFS;
    1865              :       break;
    1866              :     default:
    1867              :       return NULL;
    1868              :     }
    1869              : 
    1870         1789 :   prec = TYPE_PRECISION (rhs_type);
    1871         1789 :   loc = gimple_location (call_stmt);
    1872              : 
    1873         1789 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1874         1789 :   if (!vec_type)
    1875              :     return NULL;
    1876              : 
    1877         1783 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1878         1783 :   if (!vec_rhs_type)
    1879              :     return NULL;
    1880              : 
    1881              :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1882              :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1883              :      popcount<vector_mode>2.  */
    1884         1544 :   if (!vec_type
    1885         1544 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1886              :                                          OPTIMIZE_FOR_SPEED))
    1887              :     return NULL;
    1888              : 
    1889         1544 :   if (ifn == IFN_FFS
    1890         1544 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1891              :                                          OPTIMIZE_FOR_SPEED))
    1892              :     {
    1893            0 :       ifnnew = IFN_CTZ;
    1894            0 :       defined_at_zero_new
    1895            0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1896              :                                      val_new) == 2;
    1897              :     }
    1898         1544 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1899              :                                            OPTIMIZE_FOR_SPEED))
    1900              :     {
    1901          160 :       ifnnew = IFN_CLZ;
    1902          160 :       defined_at_zero_new
    1903          160 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1904              :                                      val_new) == 2;
    1905              :     }
    1906          160 :   if ((ifnnew == IFN_LAST
    1907          160 :        || (defined_at_zero && !defined_at_zero_new))
    1908         1384 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1909              :                                          OPTIMIZE_FOR_SPEED))
    1910              :     {
    1911              :       ifnnew = IFN_POPCOUNT;
    1912              :       defined_at_zero_new = true;
    1913              :       val_new = prec;
    1914              :     }
    1915         1418 :   if (ifnnew == IFN_LAST)
    1916              :     return NULL;
    1917              : 
    1918          286 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1919              : 
    1920          286 :   val_cmp = val_new;
    1921          286 :   if ((ifnnew == IFN_CLZ
    1922          286 :        && defined_at_zero
    1923          106 :        && defined_at_zero_new
    1924          106 :        && val == prec
    1925           54 :        && val_new == prec)
    1926          232 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1927              :     {
    1928          137 :       if (vect_is_reduction (stmt_vinfo))
    1929              :         return NULL;
    1930              : 
    1931              :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1932              :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1933          137 :       if (ifnnew == IFN_CLZ)
    1934           54 :         sub = prec;
    1935          137 :       val_cmp = prec;
    1936              : 
    1937          137 :       if (!TYPE_UNSIGNED (rhs_type))
    1938              :         {
    1939           12 :           rhs_type = unsigned_type_for (rhs_type);
    1940           12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1941           12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1942           12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1943           12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1944              :                                   vec_rhs_type);
    1945           12 :           rhs_oprnd = new_var;
    1946              :         }
    1947              : 
    1948          137 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1949          137 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1950              :                                           build_int_cst (rhs_type, -1));
    1951          137 :       gimple_set_location (pattern_stmt, loc);
    1952          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1953              : 
    1954          137 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1955          137 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1956          137 :       gimple_set_location (pattern_stmt, loc);
    1957          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1958          137 :       rhs_oprnd = new_var;
    1959              : 
    1960          137 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1961          137 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1962              :                                           m1, rhs_oprnd);
    1963          137 :       gimple_set_location (pattern_stmt, loc);
    1964          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1965          137 :       rhs_oprnd = new_var;
    1966          137 :     }
    1967          149 :   else if (ifnnew == IFN_CLZ)
    1968              :     {
    1969          106 :       if (vect_is_reduction (stmt_vinfo))
    1970              :         return NULL;
    1971              : 
    1972              :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1973              :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1974          106 :       sub = prec - (ifn == IFN_CTZ);
    1975          106 :       val_cmp = sub - val_new;
    1976              : 
    1977          106 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1978          106 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1979          106 :       gimple_set_location (pattern_stmt, loc);
    1980          106 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1981              : 
    1982          106 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1983          106 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1984              :                                           rhs_oprnd, neg);
    1985          106 :       gimple_set_location (pattern_stmt, loc);
    1986          106 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1987          106 :       rhs_oprnd = new_var;
    1988              :     }
    1989           43 :   else if (ifnnew == IFN_POPCOUNT)
    1990              :     {
    1991           43 :       if (vect_is_reduction (stmt_vinfo))
    1992              :         return NULL;
    1993              : 
    1994              :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1995              :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1996           43 :       sub = prec + (ifn == IFN_FFS);
    1997           43 :       val_cmp = sub;
    1998              : 
    1999           43 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    2000           43 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    2001           43 :       gimple_set_location (pattern_stmt, loc);
    2002           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2003              : 
    2004           43 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    2005           43 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    2006              :                                           rhs_oprnd, neg);
    2007           43 :       gimple_set_location (pattern_stmt, loc);
    2008           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2009           43 :       rhs_oprnd = new_var;
    2010              :     }
    2011            0 :   else if (ifnnew == IFN_CTZ)
    2012              :     {
    2013              :       /* .FFS (X) = .CTZ (X) + 1.  */
    2014            0 :       add = 1;
    2015            0 :       val_cmp++;
    2016              : 
    2017            0 :       if (vect_is_reduction (stmt_vinfo)
    2018            0 :           && defined_at_zero
    2019            0 :           && (!defined_at_zero_new || val != val_cmp))
    2020              :         return NULL;
    2021              :     }
    2022              : 
    2023              :   /* Create B = .IFNNEW (A).  */
    2024          286 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2025          286 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    2026          160 :     pattern_stmt
    2027          160 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    2028              :                                     build_int_cst (integer_type_node,
    2029          160 :                                                    val_new));
    2030              :   else
    2031          126 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    2032          286 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2033          286 :   gimple_set_location (pattern_stmt, loc);
    2034          286 :   *type_out = vec_type;
    2035              : 
    2036          286 :   if (sub)
    2037              :     {
    2038          203 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2039          203 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2040          203 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2041          203 :                                           build_int_cst (lhs_type, sub),
    2042              :                                           new_var);
    2043          203 :       gimple_set_location (pattern_stmt, loc);
    2044          203 :       new_var = ret_var;
    2045              :     }
    2046           83 :   else if (add)
    2047              :     {
    2048            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2049            0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2050            0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2051            0 :                                           build_int_cst (lhs_type, add));
    2052            0 :       gimple_set_location (pattern_stmt, loc);
    2053            0 :       new_var = ret_var;
    2054              :     }
    2055              : 
    2056          286 :   if (defined_at_zero
    2057          210 :       && (!defined_at_zero_new || val != val_cmp))
    2058              :     {
    2059           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2060           43 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2061           43 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2062           43 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2063           43 :       tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2064           43 :       pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
    2065              :                                           build_zero_cst (rhs_type));
    2066           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    2067              :                               truth_type_for (vec_type), rhs_type);
    2068           43 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2069              :                                           new_var,
    2070           43 :                                           build_int_cst (lhs_type, val));
    2071              :     }
    2072              : 
    2073          286 :   if (dump_enabled_p ())
    2074           36 :     dump_printf_loc (MSG_NOTE, vect_location,
    2075              :                      "created pattern stmt: %G", pattern_stmt);
    2076              : 
    2077              :   return pattern_stmt;
    2078              : }
    2079              : 
    2080              : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2081              : 
    2082              :    Try to find the following pattern:
    2083              : 
    2084              :    UTYPE1 A;
    2085              :    TYPE1 B;
    2086              :    UTYPE2 temp_in;
    2087              :    TYPE3 temp_out;
    2088              :    temp_in = (UTYPE2)A;
    2089              : 
    2090              :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2091              :    B = (TYPE1) temp_out;
    2092              : 
    2093              :    TYPE2 may or may not be equal to TYPE3.
    2094              :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2095              :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2096              : 
    2097              :    Input:
    2098              : 
    2099              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2100              :    here it starts with B = (TYPE1) temp_out;
    2101              : 
    2102              :    Output:
    2103              : 
    2104              :    * TYPE_OUT: The vector type of the output of this pattern.
    2105              : 
    2106              :    * Return value: A new stmt that will be used to replace the sequence of
    2107              :    stmts that constitute the pattern. In this case it will be:
    2108              :    B = .POPCOUNT (A);
    2109              : 
    2110              :    Similarly for clz, ctz and ffs.
    2111              : */
    2112              : 
    2113              : static gimple *
    2114     30850055 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2115              :                                          stmt_vec_info stmt_vinfo,
    2116              :                                          tree *type_out)
    2117              : {
    2118     30850055 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2119     21126511 :   gimple *call_stmt, *pattern_stmt;
    2120     21126511 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2121     51976248 :   internal_fn ifn = IFN_LAST;
    2122     30849737 :   int addend = 0;
    2123              : 
    2124              :   /* Find B = (TYPE1) temp_out. */
    2125     21126511 :   if (!last_stmt)
    2126              :     return NULL;
    2127     21126511 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2128     21126511 :   if (!CONVERT_EXPR_CODE_P (code))
    2129              :     return NULL;
    2130              : 
    2131      2904627 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2132      2904627 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2133      2904627 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2134              :     return NULL;
    2135              : 
    2136      2721699 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2137      2721699 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2138      2721699 :       || !has_single_use (rhs_oprnd))
    2139              :     return NULL;
    2140      1392660 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2141              : 
    2142              :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2143      1392660 :   if (!is_gimple_call (call_stmt))
    2144              :     return NULL;
    2145        99582 :   switch (gimple_call_combined_fn (call_stmt))
    2146              :     {
    2147              :       int val;
    2148              :     CASE_CFN_POPCOUNT:
    2149              :       ifn = IFN_POPCOUNT;
    2150              :       break;
    2151         2059 :     CASE_CFN_CLZ:
    2152         2059 :       ifn = IFN_CLZ;
    2153              :       /* Punt if call result is unsigned and defined value at zero
    2154              :          is negative, as the negative value doesn't extend correctly.  */
    2155         2059 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2156            0 :           && gimple_call_internal_p (call_stmt)
    2157         2059 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2158              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2159         2059 :           && val < 0)
    2160              :         return NULL;
    2161              :       break;
    2162          706 :     CASE_CFN_CTZ:
    2163          706 :       ifn = IFN_CTZ;
    2164              :       /* Punt if call result is unsigned and defined value at zero
    2165              :          is negative, as the negative value doesn't extend correctly.  */
    2166          706 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2167            0 :           && gimple_call_internal_p (call_stmt)
    2168          706 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2169              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2170          706 :           && val < 0)
    2171              :         return NULL;
    2172              :       break;
    2173           57 :     CASE_CFN_FFS:
    2174           57 :       ifn = IFN_FFS;
    2175           57 :       break;
    2176              :     default:
    2177              :       return NULL;
    2178              :     }
    2179              : 
    2180         3124 :   if (gimple_call_num_args (call_stmt) != 1
    2181         3124 :       && gimple_call_num_args (call_stmt) != 2)
    2182              :     return NULL;
    2183              : 
    2184         3124 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2185         3124 :   vect_unpromoted_value unprom_diff;
    2186         3124 :   rhs_origin
    2187         3124 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2188              : 
    2189         3124 :   if (!rhs_origin)
    2190              :     return NULL;
    2191              : 
    2192              :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2193         3114 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2194              :     return NULL;
    2195              : 
    2196              :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2197              :      different builtins/internal functions have different behaviors.  */
    2198         1607 :   if (TYPE_PRECISION (unprom_diff.type)
    2199         1607 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2200          264 :     switch (ifn)
    2201              :       {
    2202           95 :       case IFN_POPCOUNT:
    2203              :         /* For popcount require zero extension, which doesn't add any
    2204              :            further bits to the count.  */
    2205           95 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2206              :           return NULL;
    2207              :         break;
    2208          109 :       case IFN_CLZ:
    2209              :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2210              :            if it is undefined at zero or if it matches also for the
    2211              :            defined value there.  */
    2212          109 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2213              :           return NULL;
    2214          109 :         if (!type_has_mode_precision_p (lhs_type)
    2215          109 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2216            0 :           return NULL;
    2217          109 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2218          109 :                   - TYPE_PRECISION (lhs_type));
    2219          109 :         if (gimple_call_internal_p (call_stmt)
    2220          109 :             && gimple_call_num_args (call_stmt) == 2)
    2221              :           {
    2222            0 :             int val1, val2;
    2223            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2224            0 :             int d2
    2225            0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2226              :                                            val2);
    2227            0 :             if (d2 != 2 || val1 != val2 + addend)
    2228              :               return NULL;
    2229              :           }
    2230              :         break;
    2231           40 :       case IFN_CTZ:
    2232              :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2233              :            if it is undefined at zero or if it matches also for the
    2234              :            defined value there.  */
    2235           40 :         if (gimple_call_internal_p (call_stmt)
    2236           40 :             && gimple_call_num_args (call_stmt) == 2)
    2237              :           {
    2238            0 :             int val1, val2;
    2239            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2240            0 :             int d2
    2241            0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2242              :                                            val2);
    2243            0 :             if (d2 != 2 || val1 != val2)
    2244              :               return NULL;
    2245              :           }
    2246              :         break;
    2247              :       case IFN_FFS:
    2248              :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2249              :         break;
    2250            0 :       default:
    2251            0 :         gcc_unreachable ();
    2252              :       }
    2253              : 
    2254         1607 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2255              :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2256         1607 :   if (!vec_type)
    2257              :     return NULL;
    2258              : 
    2259         1482 :   bool supported
    2260         1482 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2261         1482 :   if (!supported)
    2262         1305 :     switch (ifn)
    2263              :       {
    2264              :       case IFN_POPCOUNT:
    2265              :       case IFN_CLZ:
    2266              :         return NULL;
    2267           57 :       case IFN_FFS:
    2268              :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2269           57 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2270              :                                             OPTIMIZE_FOR_SPEED))
    2271              :           break;
    2272              :         /* FALLTHRU */
    2273          515 :       case IFN_CTZ:
    2274              :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2275              :            clz or popcount.  */
    2276          515 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2277              :                                             OPTIMIZE_FOR_SPEED))
    2278              :           break;
    2279          455 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2280              :                                             OPTIMIZE_FOR_SPEED))
    2281              :           break;
    2282              :         return NULL;
    2283            0 :       default:
    2284            0 :         gcc_unreachable ();
    2285              :       }
    2286              : 
    2287          318 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2288              :                          call_stmt);
    2289              : 
    2290              :   /* Create B = .POPCOUNT (A).  */
    2291          318 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2292          318 :   tree arg2 = NULL_TREE;
    2293          318 :   int val;
    2294          318 :   if (ifn == IFN_CLZ
    2295          368 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2296              :                                     val) == 2)
    2297           48 :     arg2 = build_int_cst (integer_type_node, val);
    2298          270 :   else if (ifn == IFN_CTZ
    2299          363 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2300              :                                          val) == 2)
    2301           93 :     arg2 = build_int_cst (integer_type_node, val);
    2302          318 :   if (arg2)
    2303          141 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2304              :   else
    2305          177 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2306          318 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2307          318 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2308          318 :   *type_out = vec_type;
    2309              : 
    2310          318 :   if (dump_enabled_p ())
    2311           24 :     dump_printf_loc (MSG_NOTE, vect_location,
    2312              :                      "created pattern stmt: %G", pattern_stmt);
    2313              : 
    2314          318 :   if (addend)
    2315              :     {
    2316           12 :       gcc_assert (supported);
    2317           12 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2318           12 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2319           12 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2320           12 :                                           build_int_cst (lhs_type, addend));
    2321              :     }
    2322          306 :   else if (!supported)
    2323              :     {
    2324          141 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2325          141 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2326          141 :       pattern_stmt
    2327          141 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2328          141 :       if (pattern_stmt == NULL)
    2329              :         return NULL;
    2330          141 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2331              :         {
    2332          141 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2333          141 :           gimple_seq_add_seq_without_update (pseq, seq);
    2334              :         }
    2335              :     }
    2336              :   return pattern_stmt;
    2337              : }
    2338              : 
    2339              : /* Function vect_recog_pow_pattern
    2340              : 
    2341              :    Try to find the following pattern:
    2342              : 
    2343              :      x = POW (y, N);
    2344              : 
    2345              :    with POW being one of pow, powf, powi, powif and N being
    2346              :    either 2 or 0.5.
    2347              : 
    2348              :    Input:
    2349              : 
    2350              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2351              : 
    2352              :    Output:
    2353              : 
    2354              :    * TYPE_OUT: The type of the output of this pattern.
    2355              : 
    2356              :    * Return value: A new stmt that will be used to replace the sequence of
    2357              :    stmts that constitute the pattern. In this case it will be:
    2358              :         x = x * x
    2359              :    or
    2360              :         x = sqrt (x)
    2361              : */
    2362              : 
    2363              : static gimple *
    2364     30850070 : vect_recog_pow_pattern (vec_info *vinfo,
    2365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2366              : {
    2367     30850070 :   gimple *last_stmt = stmt_vinfo->stmt;
    2368     30850070 :   tree base, exp;
    2369     30850070 :   gimple *stmt;
    2370     30850070 :   tree var;
    2371              : 
    2372     30850070 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2373              :     return NULL;
    2374              : 
    2375      1500550 :   switch (gimple_call_combined_fn (last_stmt))
    2376              :     {
    2377          276 :     CASE_CFN_POW:
    2378          276 :     CASE_CFN_POWI:
    2379          276 :       break;
    2380              : 
    2381              :     default:
    2382              :       return NULL;
    2383              :     }
    2384              : 
    2385          276 :   base = gimple_call_arg (last_stmt, 0);
    2386          276 :   exp = gimple_call_arg (last_stmt, 1);
    2387          276 :   if (TREE_CODE (exp) != REAL_CST
    2388          249 :       && TREE_CODE (exp) != INTEGER_CST)
    2389              :     {
    2390          249 :       if (flag_unsafe_math_optimizations
    2391           37 :           && TREE_CODE (base) == REAL_CST
    2392          252 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2393              :         {
    2394            3 :           combined_fn log_cfn;
    2395            3 :           built_in_function exp_bfn;
    2396            3 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2397              :             {
    2398              :             case BUILT_IN_POW:
    2399              :               log_cfn = CFN_BUILT_IN_LOG;
    2400              :               exp_bfn = BUILT_IN_EXP;
    2401              :               break;
    2402            0 :             case BUILT_IN_POWF:
    2403            0 :               log_cfn = CFN_BUILT_IN_LOGF;
    2404            0 :               exp_bfn = BUILT_IN_EXPF;
    2405            0 :               break;
    2406            0 :             case BUILT_IN_POWL:
    2407            0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2408            0 :               exp_bfn = BUILT_IN_EXPL;
    2409            0 :               break;
    2410              :             default:
    2411              :               return NULL;
    2412              :             }
    2413            3 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2414            3 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2415              :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2416              :              does that, but if C is a power of 2, we want to use
    2417              :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2418              :              vectorization we don't have vectorized exp2.  */
    2419            3 :           if (logc
    2420            3 :               && TREE_CODE (logc) == REAL_CST
    2421            3 :               && exp_decl
    2422            6 :               && lookup_attribute ("omp declare simd",
    2423            3 :                                    DECL_ATTRIBUTES (exp_decl)))
    2424              :             {
    2425            3 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2426            3 :               if (node->simd_clones == NULL)
    2427              :                 {
    2428            2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2429            2 :                       || node->definition)
    2430              :                     return NULL;
    2431            2 :                   expand_simd_clones (node);
    2432            2 :                   if (node->simd_clones == NULL)
    2433              :                     return NULL;
    2434              :                 }
    2435            3 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2436            3 :               if (!*type_out)
    2437              :                 return NULL;
    2438            3 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2439            3 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2440            3 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2441            3 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2442            3 :               g = gimple_build_call (exp_decl, 1, def);
    2443            3 :               gimple_call_set_lhs (g, res);
    2444            3 :               return g;
    2445              :             }
    2446              :         }
    2447              : 
    2448          246 :       return NULL;
    2449              :     }
    2450              : 
    2451              :   /* We now have a pow or powi builtin function call with a constant
    2452              :      exponent.  */
    2453              : 
    2454              :   /* Catch squaring.  */
    2455           27 :   if ((tree_fits_shwi_p (exp)
    2456            0 :        && tree_to_shwi (exp) == 2)
    2457           27 :       || (TREE_CODE (exp) == REAL_CST
    2458           27 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2459              :     {
    2460            7 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2461            7 :                                             TREE_TYPE (base), type_out))
    2462              :         return NULL;
    2463              : 
    2464            7 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2465            7 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2466            7 :       return stmt;
    2467              :     }
    2468              : 
    2469              :   /* Catch square root.  */
    2470           20 :   if (TREE_CODE (exp) == REAL_CST
    2471           20 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2472              :     {
    2473           10 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2474           10 :       if (*type_out
    2475           10 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2476              :                                              OPTIMIZE_FOR_SPEED))
    2477              :         {
    2478            8 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2479            8 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2480            8 :           gimple_call_set_lhs (stmt, var);
    2481            8 :           gimple_call_set_nothrow (stmt, true);
    2482            8 :           return stmt;
    2483              :         }
    2484              :     }
    2485              : 
    2486              :   return NULL;
    2487              : }
    2488              : 
    2489              : 
    2490              : /* Function vect_recog_widen_sum_pattern
    2491              : 
    2492              :    Try to find the following pattern:
    2493              : 
    2494              :      type x_t;
    2495              :      TYPE x_T, sum = init;
    2496              :    loop:
    2497              :      sum_0 = phi <init, sum_1>
    2498              :      S1  x_t = *p;
    2499              :      S2  x_T = (TYPE) x_t;
    2500              :      S3  sum_1 = x_T + sum_0;
    2501              : 
    2502              :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2503              :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2504              :    a special case of a reduction computation.
    2505              : 
    2506              :    Input:
    2507              : 
    2508              :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2509              :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2510              : 
    2511              :    Output:
    2512              : 
    2513              :    * TYPE_OUT: The type of the output of this pattern.
    2514              : 
    2515              :    * Return value: A new stmt that will be used to replace the sequence of
    2516              :    stmts that constitute the pattern. In this case it will be:
    2517              :         WIDEN_SUM <x_t, sum_0>
    2518              : 
    2519              :    Note: The widening-sum idiom is a widening reduction pattern that is
    2520              :          vectorized without preserving all the intermediate results. It
    2521              :          produces only N/2 (widened) results (by summing up pairs of
    2522              :          intermediate results) rather than all N results.  Therefore, we
    2523              :          cannot allow this pattern when we want to get all the results and in
    2524              :          the correct order (as is the case when this computation is in an
    2525              :          inner-loop nested in an outer-loop that us being vectorized).  */
    2526              : 
    2527              : static gimple *
    2528     30850070 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2529              :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2530              : {
    2531     30850070 :   gimple *last_stmt = stmt_vinfo->stmt;
    2532     30850070 :   tree oprnd0, oprnd1;
    2533     30850070 :   tree type;
    2534     30850070 :   gimple *pattern_stmt;
    2535     30850070 :   tree var;
    2536              : 
    2537              :   /* Look for the following pattern
    2538              :           DX = (TYPE) X;
    2539              :           sum_1 = DX + sum_0;
    2540              :      In which DX is at least double the size of X, and sum_1 has been
    2541              :      recognized as a reduction variable.
    2542              :    */
    2543              : 
    2544              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2545              :      of the above pattern.  */
    2546              : 
    2547     30850070 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2548              :                                        &oprnd0, &oprnd1)
    2549        52442 :       || TREE_CODE (oprnd0) != SSA_NAME
    2550     30902243 :       || !vinfo->lookup_def (oprnd0))
    2551     30797962 :     return NULL;
    2552              : 
    2553        52108 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2554              : 
    2555              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2556              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2557              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2558              :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2559              :      'TYPE'.  */
    2560              : 
    2561        52108 :   vect_unpromoted_value unprom0;
    2562        52108 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2563        52108 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2564              :     return NULL;
    2565              : 
    2566         2291 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2567              : 
    2568         2291 :   if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2569              :                                       unprom0.type, type_out))
    2570              :     return NULL;
    2571              : 
    2572            0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2573            0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2574              : 
    2575            0 :   return pattern_stmt;
    2576              : }
    2577              : 
    2578              : /* Function vect_recog_bitfield_ref_pattern
    2579              : 
    2580              :    Try to find the following pattern:
    2581              : 
    2582              :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2583              :    result = (type_out) bf_value;
    2584              : 
    2585              :    or
    2586              : 
    2587              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2588              : 
    2589              :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2590              :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2591              : 
    2592              :    Input:
    2593              : 
    2594              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2595              :    here it starts with:
    2596              :    result = (type_out) bf_value;
    2597              : 
    2598              :    or
    2599              : 
    2600              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2601              : 
    2602              :    Output:
    2603              : 
    2604              :    * TYPE_OUT: The vector type of the output of this pattern.
    2605              : 
    2606              :    * Return value: A new stmt that will be used to replace the sequence of
    2607              :    stmts that constitute the pattern. If the precision of type_out is bigger
    2608              :    than the precision type of _1 we perform the widening before the shifting,
    2609              :    since the new precision will be large enough to shift the value and moving
    2610              :    widening operations up the statement chain enables the generation of
    2611              :    widening loads.  If we are widening and the operation after the pattern is
    2612              :    an addition then we mask first and shift later, to enable the generation of
    2613              :    shifting adds.  In the case of narrowing we will always mask first, shift
    2614              :    last and then perform a narrowing operation.  This will enable the
    2615              :    generation of narrowing shifts.
    2616              : 
    2617              :    Widening with mask first, shift later:
    2618              :    container = (type_out) container;
    2619              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2620              :    result = masked >> bitpos;
    2621              : 
    2622              :    Widening with shift first, mask last:
    2623              :    container = (type_out) container;
    2624              :    shifted = container >> bitpos;
    2625              :    result = shifted & ((1 << bitsize) - 1);
    2626              : 
    2627              :    Narrowing:
    2628              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2629              :    result = masked >> bitpos;
    2630              :    result = (type_out) result;
    2631              : 
    2632              :    If the bitfield is signed and it's wider than type_out, we need to
    2633              :    keep the result sign-extended:
    2634              :    container = (type) container;
    2635              :    masked = container << (prec - bitsize - bitpos);
    2636              :    result = (type_out) (masked >> (prec - bitsize));
    2637              : 
    2638              :    Here type is the signed variant of the wider of type_out and the type
    2639              :    of container.
    2640              : 
    2641              :    The shifting is always optional depending on whether bitpos != 0.
    2642              : 
    2643              :    When the original bitfield was inside a gcond then an new gcond is also
    2644              :    generated with the newly `result` as the operand to the comparison.
    2645              : 
    2646              : */
    2647              : 
    2648              : static gimple *
    2649     30792984 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2650              :                                  tree *type_out)
    2651              : {
    2652     30792984 :   gimple *bf_stmt = NULL;
    2653     30792984 :   tree lhs = NULL_TREE;
    2654     30792984 :   tree ret_type = NULL_TREE;
    2655     30792984 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2656     30792984 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2657              :     {
    2658      5144197 :       tree op = gimple_cond_lhs (cond_stmt);
    2659      5144197 :       if (TREE_CODE (op) != SSA_NAME)
    2660              :         return NULL;
    2661      5143893 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2662      5143893 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2663              :         return NULL;
    2664              :     }
    2665     25648787 :   else if (is_gimple_assign (stmt)
    2666     21069121 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2667     28474664 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2668              :     {
    2669      2784521 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2670      2784521 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2671      2784521 :       lhs = gimple_assign_lhs (stmt);
    2672      2784521 :       ret_type = TREE_TYPE (lhs);
    2673              :     }
    2674              : 
    2675      6110737 :   if (!bf_stmt
    2676      6110737 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2677              :     return NULL;
    2678              : 
    2679        15446 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2680        15446 :   tree container = TREE_OPERAND (bf_ref, 0);
    2681        15446 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2682              : 
    2683        15446 :   if (!bit_field_offset (bf_ref).is_constant ()
    2684        15446 :       || !bit_field_size (bf_ref).is_constant ()
    2685        15446 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2686              :     return NULL;
    2687              : 
    2688        30514 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2689        15444 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2690        17603 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2691        13289 :     return NULL;
    2692              : 
    2693         2157 :   gimple *use_stmt, *pattern_stmt;
    2694         2157 :   use_operand_p use_p;
    2695         2157 :   bool shift_first = true;
    2696         2157 :   tree container_type = TREE_TYPE (container);
    2697         2157 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2698              : 
    2699              :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2700              :      the container may change and we have to consider offset change for
    2701              :      widening loads on big endianness.  The shift_n calculated here can be
    2702              :      independent of widening.  */
    2703         2157 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2704         2157 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2705         2157 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2706         2157 :   if (BYTES_BIG_ENDIAN)
    2707              :     shift_n = prec - shift_n - mask_width;
    2708              : 
    2709         2157 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2710         1394 :                    TYPE_PRECISION (ret_type) > mask_width);
    2711         2157 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2712         2157 :                      TYPE_PRECISION (ret_type));
    2713              : 
    2714              :   /* We move the conversion earlier if the loaded type is smaller than the
    2715              :      return type to enable the use of widening loads.  And if we need a
    2716              :      sign extension, we need to convert the loaded value early to a signed
    2717              :      type as well.  */
    2718         2157 :   if (ref_sext || load_widen)
    2719              :     {
    2720          941 :       tree type = load_widen ? ret_type : container_type;
    2721          941 :       if (ref_sext)
    2722          902 :         type = gimple_signed_type (type);
    2723          941 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2724              :                                           NOP_EXPR, container);
    2725          941 :       container = gimple_get_lhs (pattern_stmt);
    2726          941 :       container_type = TREE_TYPE (container);
    2727          941 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2728          941 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2729          941 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2730              :     }
    2731         1216 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2732              :     /* If we are doing the conversion last then also delay the shift as we may
    2733              :        be able to combine the shift and conversion in certain cases.  */
    2734              :     shift_first = false;
    2735              : 
    2736              :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2737              :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2738              :      add into a single instruction.  */
    2739         1416 :   if (lhs && !is_pattern_stmt_p (stmt_info)
    2740         3573 :       && single_imm_use (lhs, &use_p, &use_stmt))
    2741              :     {
    2742         1049 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2743         1049 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2744              :         shift_first = false;
    2745              :     }
    2746              : 
    2747              :   /* If we don't have to shift we only generate the mask, so just fix the
    2748              :      code-path to shift_first.  */
    2749         2157 :   if (shift_n == 0)
    2750          756 :     shift_first = true;
    2751              : 
    2752         2157 :   tree result;
    2753         2157 :   if (shift_first && !ref_sext)
    2754              :     {
    2755          503 :       tree shifted = container;
    2756          503 :       if (shift_n)
    2757              :         {
    2758           59 :           pattern_stmt
    2759           59 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2760              :                                    RSHIFT_EXPR, container,
    2761           59 :                                    build_int_cst (sizetype, shift_n));
    2762           59 :           shifted = gimple_assign_lhs (pattern_stmt);
    2763           59 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2764              :         }
    2765              : 
    2766          503 :       tree mask = wide_int_to_tree (container_type,
    2767          503 :                                     wi::mask (mask_width, false, prec));
    2768              : 
    2769          503 :       pattern_stmt
    2770          503 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2771              :                                BIT_AND_EXPR, shifted, mask);
    2772          503 :       result = gimple_assign_lhs (pattern_stmt);
    2773              :     }
    2774              :   else
    2775              :     {
    2776         1654 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2777         1654 :       if (!ref_sext)
    2778              :         {
    2779          752 :           tree mask = wide_int_to_tree (container_type,
    2780          752 :                                         wi::shifted_mask (shift_n,
    2781              :                                                           mask_width,
    2782              :                                                           false, prec));
    2783          752 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2784              :                                               container, mask);
    2785              :         }
    2786              :       else
    2787              :         {
    2788          902 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2789          902 :           shift_n += shl;
    2790          902 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2791              :                                               container,
    2792              :                                               build_int_cst (sizetype,
    2793          902 :                                                              shl));
    2794              :         }
    2795              : 
    2796         1654 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2797         1654 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2798         1654 :       pattern_stmt
    2799         1654 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2800              :                                RSHIFT_EXPR, masked,
    2801         1654 :                                build_int_cst (sizetype, shift_n));
    2802         1654 :       result = gimple_assign_lhs (pattern_stmt);
    2803              :     }
    2804              : 
    2805         2157 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2806              :     {
    2807         1438 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2808         1438 :       pattern_stmt
    2809         1438 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2810              :                                NOP_EXPR, result);
    2811              :     }
    2812              : 
    2813         2157 :   if (!lhs)
    2814              :     {
    2815          741 :       if (!vectype)
    2816              :         return NULL;
    2817              : 
    2818          603 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2819          603 :       vectype = truth_type_for (vectype);
    2820              : 
    2821              :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2822              :                 same way as vect_recog_gcond_pattern does.  However because
    2823              :                 patterns cannot match the same root twice,  when we handle and
    2824              :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2825              :                 apply anymore.  We should really fix it so that we don't need to
    2826              :                 duplicate transformations like these.  */
    2827          603 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2828          603 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2829          603 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2830          603 :       gimple *new_stmt
    2831          603 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2832              :                                gimple_get_lhs (pattern_stmt),
    2833              :                                fold_convert (container_type, cond_cst));
    2834          603 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2835          603 :       pattern_stmt
    2836          603 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2837          603 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2838              :                              NULL_TREE, NULL_TREE);
    2839              :     }
    2840              : 
    2841         2019 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2842         2019 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2843              : 
    2844         2019 :   return pattern_stmt;
    2845              : }
    2846              : 
    2847              : /* Function vect_recog_bit_insert_pattern
    2848              : 
    2849              :    Try to find the following pattern:
    2850              : 
    2851              :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2852              : 
    2853              :    Input:
    2854              : 
    2855              :    * STMT_VINFO: The stmt we want to replace.
    2856              : 
    2857              :    Output:
    2858              : 
    2859              :    * TYPE_OUT: The vector type of the output of this pattern.
    2860              : 
    2861              :    * Return value: A new stmt that will be used to replace the sequence of
    2862              :    stmts that constitute the pattern. In this case it will be:
    2863              :    value = (container_type) value;          // Make sure
    2864              :    shifted = value << bitpos;                 // Shift value into place
    2865              :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2866              :                                             // the 'to-write value'.
    2867              :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2868              :                                             // write to from the value we want
    2869              :                                             // to write to.
    2870              :    written = cleared | masked;              // Write bits.
    2871              : 
    2872              : 
    2873              :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2874              :    bits corresponding to the real size of the bitfield value we are writing to.
    2875              :    The shifting is always optional depending on whether bitpos != 0.
    2876              : 
    2877              : */
    2878              : 
    2879              : static gimple *
    2880     30795994 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2881              :                                tree *type_out)
    2882              : {
    2883     30795994 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2884     28136487 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2885              :     return NULL;
    2886              : 
    2887          597 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2888          597 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2889          597 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2890              : 
    2891          597 :   tree bf_type = TREE_TYPE (value);
    2892          597 :   tree container_type = TREE_TYPE (container);
    2893              : 
    2894          597 :   if (!INTEGRAL_TYPE_P (container_type)
    2895          597 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2896              :     return NULL;
    2897              : 
    2898          500 :   gimple *pattern_stmt;
    2899              : 
    2900          500 :   vect_unpromoted_value unprom;
    2901          500 :   unprom.set_op (value, vect_internal_def);
    2902          500 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2903              :                               get_vectype_for_scalar_type (vinfo,
    2904              :                                                            container_type));
    2905              : 
    2906          500 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2907          500 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2908          500 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2909          500 :   if (BYTES_BIG_ENDIAN)
    2910              :     {
    2911              :       shift_n = prec - shift_n - mask_width;
    2912              :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2913              :     }
    2914              : 
    2915          500 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2916              :     {
    2917            0 :       pattern_stmt =
    2918            0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2919              :                              NOP_EXPR, value);
    2920            0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2921            0 :       value = gimple_get_lhs (pattern_stmt);
    2922              :     }
    2923              : 
    2924              :   /* Shift VALUE into place.  */
    2925          500 :   tree shifted = value;
    2926          500 :   if (shift_n)
    2927              :     {
    2928          249 :       gimple_seq stmts = NULL;
    2929          249 :       shifted
    2930          249 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2931          249 :       if (!gimple_seq_empty_p (stmts))
    2932          112 :         append_pattern_def_seq (vinfo, stmt_info,
    2933              :                                 gimple_seq_first_stmt (stmts));
    2934              :     }
    2935              : 
    2936          500 :   tree mask_t
    2937          500 :     = wide_int_to_tree (container_type,
    2938          500 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2939              : 
    2940              :   /* Clear bits we don't want to write back from SHIFTED.  */
    2941          500 :   gimple_seq stmts = NULL;
    2942          500 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2943              :                               mask_t);
    2944          500 :   if (!gimple_seq_empty_p (stmts))
    2945              :     {
    2946          110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2947          110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2948              :     }
    2949              : 
    2950              :   /* Mask off the bits in the container that we are to write to.  */
    2951          500 :   mask_t = wide_int_to_tree (container_type,
    2952          500 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2953          500 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2954          500 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2955          500 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2956              : 
    2957              :   /* Write MASKED into CLEARED.  */
    2958          500 :   pattern_stmt
    2959          500 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2960              :                            BIT_IOR_EXPR, cleared, masked);
    2961              : 
    2962          500 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2963          500 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2964              : 
    2965          500 :   return pattern_stmt;
    2966              : }
    2967              : 
    2968              : 
    2969              : /* Recognize cases in which an operation is performed in one type WTYPE
    2970              :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2971              :    if we have:
    2972              : 
    2973              :      ATYPE a;  // narrower than NTYPE
    2974              :      BTYPE b;  // narrower than NTYPE
    2975              :      WTYPE aw = (WTYPE) a;
    2976              :      WTYPE bw = (WTYPE) b;
    2977              :      WTYPE res = aw + bw;  // only uses of aw and bw
    2978              : 
    2979              :    then it would be more efficient to do:
    2980              : 
    2981              :      NTYPE an = (NTYPE) a;
    2982              :      NTYPE bn = (NTYPE) b;
    2983              :      NTYPE resn = an + bn;
    2984              :      WTYPE res = (WTYPE) resn;
    2985              : 
    2986              :    Other situations include things like:
    2987              : 
    2988              :      ATYPE a;  // NTYPE or narrower
    2989              :      WTYPE aw = (WTYPE) a;
    2990              :      WTYPE res = aw + b;
    2991              : 
    2992              :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2993              :    to truncate "b" and do the operation on NTYPE instead:
    2994              : 
    2995              :      NTYPE an = (NTYPE) a;
    2996              :      NTYPE bn = (NTYPE) b;  // truncation
    2997              :      NTYPE resn = an + bn;
    2998              :      WTYPE res = (WTYPE) resn;
    2999              : 
    3000              :    All users of "res" should then use "resn" instead, making the final
    3001              :    statement dead (not marked as relevant).  The final statement is still
    3002              :    needed to maintain the type correctness of the IR.
    3003              : 
    3004              :    vect_determine_precisions has already determined the minimum
    3005              :    precision of the operation and the minimum precision required
    3006              :    by users of the result.  */
    3007              : 
    3008              : static gimple *
    3009     30796434 : vect_recog_over_widening_pattern (vec_info *vinfo,
    3010              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3011              : {
    3012     30796434 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3013     21073174 :   if (!last_stmt)
    3014              :     return NULL;
    3015              : 
    3016              :   /* See whether we have found that this operation can be done on a
    3017              :      narrower type without changing its semantics.  */
    3018     21073174 :   unsigned int new_precision = last_stmt_info->operation_precision;
    3019     21073174 :   if (!new_precision)
    3020              :     return NULL;
    3021              : 
    3022      1634307 :   tree lhs = gimple_assign_lhs (last_stmt);
    3023      1634307 :   tree type = TREE_TYPE (lhs);
    3024      1634307 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3025              : 
    3026              :   /* Punt for reductions where we don't handle the type conversions.  */
    3027      1634307 :   if (vect_is_reduction (last_stmt_info))
    3028              :     return NULL;
    3029              : 
    3030              :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    3031              :      operands are interesting.  */
    3032      1625961 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    3033              : 
    3034              :   /* Check the operands.  */
    3035      1625961 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    3036      1625961 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    3037      1625961 :   unprom.quick_grow_cleared (nops);
    3038      1625961 :   unsigned int min_precision = 0;
    3039      1625961 :   bool single_use_p = false;
    3040      4858499 :   for (unsigned int i = 0; i < nops; ++i)
    3041              :     {
    3042      3234096 :       tree op = gimple_op (last_stmt, first_op + i);
    3043      3234096 :       if (TREE_CODE (op) == INTEGER_CST)
    3044      1475193 :         unprom[i].set_op (op, vect_constant_def);
    3045      1758903 :       else if (TREE_CODE (op) == SSA_NAME)
    3046              :         {
    3047      1758903 :           bool op_single_use_p = true;
    3048      1758903 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3049              :                                                      &op_single_use_p))
    3050         1558 :             return NULL;
    3051              :           /* If:
    3052              : 
    3053              :              (1) N bits of the result are needed;
    3054              :              (2) all inputs are widened from M<N bits; and
    3055              :              (3) one operand OP is a single-use SSA name
    3056              : 
    3057              :              we can shift the M->N widening from OP to the output
    3058              :              without changing the number or type of extensions involved.
    3059              :              This then reduces the number of copies of STMT_INFO.
    3060              : 
    3061              :              If instead of (3) more than one operand is a single-use SSA name,
    3062              :              shifting the extension to the output is even more of a win.
    3063              : 
    3064              :              If instead:
    3065              : 
    3066              :              (1) N bits of the result are needed;
    3067              :              (2) one operand OP2 is widened from M2<N bits;
    3068              :              (3) another operand OP1 is widened from M1<M2 bits; and
    3069              :              (4) both OP1 and OP2 are single-use
    3070              : 
    3071              :              the choice is between:
    3072              : 
    3073              :              (a) truncating OP2 to M1, doing the operation on M1,
    3074              :                  and then widening the result to N
    3075              : 
    3076              :              (b) widening OP1 to M2, doing the operation on M2, and then
    3077              :                  widening the result to N
    3078              : 
    3079              :              Both shift the M2->N widening of the inputs to the output.
    3080              :              (a) additionally shifts the M1->M2 widening to the output;
    3081              :              it requires fewer copies of STMT_INFO but requires an extra
    3082              :              M2->M1 truncation.
    3083              : 
    3084              :              Which is better will depend on the complexity and cost of
    3085              :              STMT_INFO, which is hard to predict at this stage.  However,
    3086              :              a clear tie-breaker in favor of (b) is the fact that the
    3087              :              truncation in (a) increases the length of the operation chain.
    3088              : 
    3089              :              If instead of (4) only one of OP1 or OP2 is single-use,
    3090              :              (b) is still a win over doing the operation in N bits:
    3091              :              it still shifts the M2->N widening on the single-use operand
    3092              :              to the output and reduces the number of STMT_INFO copies.
    3093              : 
    3094              :              If neither operand is single-use then operating on fewer than
    3095              :              N bits might lead to more extensions overall.  Whether it does
    3096              :              or not depends on global information about the vectorization
    3097              :              region, and whether that's a good trade-off would again
    3098              :              depend on the complexity and cost of the statements involved,
    3099              :              as well as things like register pressure that are not normally
    3100              :              modelled at this stage.  We therefore ignore these cases
    3101              :              and just optimize the clear single-use wins above.
    3102              : 
    3103              :              Thus we take the maximum precision of the unpromoted operands
    3104              :              and record whether any operand is single-use.  */
    3105      1757345 :           if (unprom[i].dt == vect_internal_def)
    3106              :             {
    3107      1017054 :               min_precision = MAX (min_precision,
    3108              :                                    TYPE_PRECISION (unprom[i].type));
    3109      1017054 :               single_use_p |= op_single_use_p;
    3110              :             }
    3111              :         }
    3112              :       else
    3113              :         return NULL;
    3114              :     }
    3115              : 
    3116              :   /* Although the operation could be done in operation_precision, we have
    3117              :      to balance that against introducing extra truncations or extensions.
    3118              :      Calculate the minimum precision that can be handled efficiently.
    3119              : 
    3120              :      The loop above determined that the operation could be handled
    3121              :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3122              :      extension from the inputs to the output without introducing more
    3123              :      instructions, and would reduce the number of instructions required
    3124              :      for STMT_INFO itself.
    3125              : 
    3126              :      vect_determine_precisions has also determined that the result only
    3127              :      needs min_output_precision bits.  Truncating by a factor of N times
    3128              :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3129              :      than min_output_precision, doing the operation in TYPE and truncating
    3130              :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3131              :      In contrast:
    3132              : 
    3133              :      - truncating the input to a unary operation and doing the operation
    3134              :        in the new type requires at most N - 1 + 1 = N instructions per
    3135              :        output vector
    3136              : 
    3137              :      - doing the same for a binary operation requires at most
    3138              :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3139              : 
    3140              :      Both unary and binary operations require fewer instructions than
    3141              :      this if the operands were extended from a suitable truncated form.
    3142              :      Thus there is usually nothing to lose by doing operations in
    3143              :      min_output_precision bits, but there can be something to gain.  */
    3144      1624403 :   if (!single_use_p)
    3145      1283505 :     min_precision = last_stmt_info->min_output_precision;
    3146              :   else
    3147       340898 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3148              : 
    3149              :   /* Apply the minimum efficient precision we just calculated.  */
    3150      1624403 :   if (new_precision < min_precision)
    3151              :     new_precision = min_precision;
    3152      1624403 :   new_precision = vect_element_precision (new_precision);
    3153      1624403 :   if (new_precision >= TYPE_PRECISION (type))
    3154              :     return NULL;
    3155              : 
    3156       151334 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3157              : 
    3158       151334 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3159       151334 :   if (!*type_out)
    3160              :     return NULL;
    3161              : 
    3162              :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3163       134268 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3164       134268 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3165              : 
    3166              :   /* If we're truncating an operation, we need to make sure that we
    3167              :      don't introduce new undefined overflow.  The codes tested here are
    3168              :      a subset of those accepted by vect_truncatable_operation_p.  */
    3169       134268 :   tree op_type = new_type;
    3170       134268 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3171       174519 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3172        27861 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3173              : 
    3174       134268 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3175       134268 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3176       134268 :   if (!new_vectype || !op_vectype)
    3177              :     return NULL;
    3178              : 
    3179              :   /* Verify we can handle the new operation.  For shifts and rotates
    3180              :      apply heuristic of whether we are likely facing vector-vector or
    3181              :      vector-scalar operation.  Since we are eventually expecting that
    3182              :      a later pattern might eventually want to rewrite an unsupported
    3183              :      into a supported case error on that side in case the original
    3184              :      operation was not supported either or this is a binary operation
    3185              :      and the 2nd operand is constant.  */
    3186       134268 :   if (code == RSHIFT_EXPR || code == LSHIFT_EXPR || code == RROTATE_EXPR)
    3187              :     {
    3188        27892 :       if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3189        26839 :           && ((unprom[1].dt != vect_external_def
    3190        26427 :                && unprom[1].dt != vect_constant_def)
    3191        18112 :               || !target_has_vecop_for_code (code, op_vectype, optab_scalar))
    3192        36648 :           && !(!target_has_vecop_for_code (code, *type_out, optab_vector)
    3193         7770 :                && ((unprom[1].dt != vect_external_def
    3194         7770 :                     || unprom[1].dt != vect_constant_def)
    3195              :                    || !target_has_vecop_for_code (code, *type_out,
    3196              :                                                   optab_scalar))))
    3197              :         return NULL;
    3198              :     }
    3199       106376 :   else if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3200       106376 :            && (target_has_vecop_for_code (code, *type_out, optab_vector)
    3201           27 :                && !(nops == 2 && unprom[1].dt == vect_constant_def)))
    3202              :     return NULL;
    3203              : 
    3204       133273 :   if (dump_enabled_p ())
    3205         4327 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3206              :                      type, new_type);
    3207              : 
    3208              :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3209       133273 :   tree ops[3] = {};
    3210       133501 :   for (unsigned int i = 1; i < first_op; ++i)
    3211          228 :     ops[i - 1] = gimple_op (last_stmt, i);
    3212       133273 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3213       133273 :                        op_type, &unprom[0], op_vectype);
    3214              : 
    3215              :   /* Use the operation to produce a result of type OP_TYPE.  */
    3216       133273 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3217       133273 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3218              :                                               ops[0], ops[1], ops[2]);
    3219       133273 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3220              : 
    3221       133273 :   if (dump_enabled_p ())
    3222         4327 :     dump_printf_loc (MSG_NOTE, vect_location,
    3223              :                      "created pattern stmt: %G", pattern_stmt);
    3224              : 
    3225              :   /* Convert back to the original signedness, if OP_TYPE is different
    3226              :      from NEW_TYPE.  */
    3227       133273 :   if (op_type != new_type)
    3228        27855 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3229              :                                         pattern_stmt, op_vectype);
    3230              : 
    3231              :   /* Promote the result to the original type.  */
    3232       133273 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3233              :                                       pattern_stmt, new_vectype);
    3234              : 
    3235       133273 :   return pattern_stmt;
    3236      1625961 : }
    3237              : 
    3238              : /* Recognize the following patterns:
    3239              : 
    3240              :      ATYPE a;  // narrower than TYPE
    3241              :      BTYPE b;  // narrower than TYPE
    3242              : 
    3243              :    1) Multiply high with scaling
    3244              :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3245              :      Here, c is bitsize (TYPE) / 2 - 1.
    3246              : 
    3247              :    2) ... or also with rounding
    3248              :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3249              :      Here, d is bitsize (TYPE) / 2 - 2.
    3250              : 
    3251              :    3) Normal multiply high
    3252              :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3253              :      Here, e is bitsize (TYPE) / 2.
    3254              : 
    3255              :    where only the bottom half of res is used.  */
    3256              : 
    3257              : static gimple *
    3258     30922585 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3259              :                           stmt_vec_info last_stmt_info, tree *type_out)
    3260              : {
    3261              :   /* Check for a right shift.  */
    3262     30922585 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3263     21199167 :   if (!last_stmt
    3264     21199167 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3265              :     return NULL;
    3266              : 
    3267              :   /* Check that the shift result is wider than the users of the
    3268              :      result need (i.e. that narrowing would be a natural choice).  */
    3269       359881 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3270       359881 :   unsigned int target_precision
    3271       359881 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3272       359881 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3273       359881 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3274              :     return NULL;
    3275              : 
    3276              :   /* Look through any change in sign on the outer shift input.  */
    3277        49663 :   vect_unpromoted_value unprom_rshift_input;
    3278        49663 :   tree rshift_input = vect_look_through_possible_promotion
    3279        49663 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3280        49663 :   if (!rshift_input
    3281        49663 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3282        49065 :            != TYPE_PRECISION (lhs_type))
    3283              :     return NULL;
    3284              : 
    3285              :   /* Get the definition of the shift input.  */
    3286        45836 :   stmt_vec_info rshift_input_stmt_info
    3287        45836 :     = vect_get_internal_def (vinfo, rshift_input);
    3288        45836 :   if (!rshift_input_stmt_info)
    3289              :     return NULL;
    3290        41213 :   gassign *rshift_input_stmt
    3291     30960570 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3292        38129 :   if (!rshift_input_stmt)
    3293              :     return NULL;
    3294              : 
    3295        38129 :   stmt_vec_info mulh_stmt_info;
    3296        38129 :   tree scale_term;
    3297        38129 :   bool rounding_p = false;
    3298              : 
    3299              :   /* Check for the presence of the rounding term.  */
    3300        45286 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3301              :     {
    3302              :       /* Check that the outer shift was by 1.  */
    3303        18994 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3304         9430 :         return NULL;
    3305              : 
    3306              :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3307         1304 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3308              :         return NULL;
    3309              : 
    3310              :       /* Look through any change in sign on the addition input.  */
    3311          110 :       vect_unpromoted_value unprom_plus_input;
    3312          110 :       tree plus_input = vect_look_through_possible_promotion
    3313          110 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3314          110 :       if (!plus_input
    3315          110 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3316          110 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3317              :         return NULL;
    3318              : 
    3319              :       /* Get the definition of the multiply-high-scale part.  */
    3320          110 :       stmt_vec_info plus_input_stmt_info
    3321          110 :         = vect_get_internal_def (vinfo, plus_input);
    3322          110 :       if (!plus_input_stmt_info)
    3323              :         return NULL;
    3324          110 :       gassign *plus_input_stmt
    3325         9540 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3326          110 :       if (!plus_input_stmt
    3327          110 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3328              :         return NULL;
    3329              : 
    3330              :       /* Look through any change in sign on the scaling input.  */
    3331           67 :       vect_unpromoted_value unprom_scale_input;
    3332           67 :       tree scale_input = vect_look_through_possible_promotion
    3333           67 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3334           67 :       if (!scale_input
    3335           67 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3336           67 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3337              :         return NULL;
    3338              : 
    3339              :       /* Get the definition of the multiply-high part.  */
    3340           67 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3341           67 :       if (!mulh_stmt_info)
    3342              :         return NULL;
    3343              : 
    3344              :       /* Get the scaling term.  */
    3345           67 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3346           67 :       rounding_p = true;
    3347              :     }
    3348              :   else
    3349              :     {
    3350        28632 :       mulh_stmt_info = rshift_input_stmt_info;
    3351        28632 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3352              :     }
    3353              : 
    3354              :   /* Check that the scaling factor is constant.  */
    3355        28699 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3356              :     return NULL;
    3357              : 
    3358              :   /* Check whether the scaling input term can be seen as two widened
    3359              :      inputs multiplied together.  */
    3360        83238 :   vect_unpromoted_value unprom_mult[2];
    3361        27746 :   tree new_type;
    3362        27746 :   unsigned int nops
    3363        27746 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3364              :                             false, 2, unprom_mult, &new_type);
    3365        27746 :   if (nops != 2)
    3366              :     return NULL;
    3367              : 
    3368              :   /* Adjust output precision.  */
    3369         1253 :   if (TYPE_PRECISION (new_type) < target_precision)
    3370            0 :     new_type = build_nonstandard_integer_type
    3371            0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3372              : 
    3373         1253 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3374         1253 :   internal_fn ifn;
    3375              :   /* Check that the scaling factor is expected.  Instead of
    3376              :      target_precision, we should use the one that we actually
    3377              :      use for internal function.  */
    3378         1253 :   if (rounding_p)
    3379              :     {
    3380              :       /* Check pattern 2).  */
    3381          134 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3382          201 :           != TYPE_PRECISION (lhs_type))
    3383              :         return NULL;
    3384              : 
    3385              :       ifn = IFN_MULHRS;
    3386              :     }
    3387              :   else
    3388              :     {
    3389              :       /* Check for pattern 1).  */
    3390         2372 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3391         3558 :           == TYPE_PRECISION (lhs_type))
    3392              :         ifn = IFN_MULHS;
    3393              :       /* Check for pattern 3).  */
    3394         1152 :       else if (wi::to_widest (scale_term) + mult_precision
    3395         2304 :                == TYPE_PRECISION (lhs_type))
    3396              :         ifn = IFN_MULH;
    3397              :       else
    3398              :         return NULL;
    3399              :     }
    3400              : 
    3401         1190 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3402              : 
    3403              :   /* Check for target support.  */
    3404         1190 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3405         1190 :   if (!new_vectype
    3406         2351 :       || !direct_internal_fn_supported_p
    3407         1161 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3408         1046 :     return NULL;
    3409              : 
    3410              :   /* The IR requires a valid vector type for the cast result, even though
    3411              :      it's likely to be discarded.  */
    3412          144 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3413          144 :   if (!*type_out)
    3414              :     return NULL;
    3415              : 
    3416              :   /* Generate the IFN_MULHRS call.  */
    3417          144 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3418          144 :   tree new_ops[2];
    3419          144 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3420              :                        unprom_mult, new_vectype);
    3421          144 :   gcall *mulhrs_stmt
    3422          144 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3423          144 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3424          144 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3425              : 
    3426          144 :   if (dump_enabled_p ())
    3427            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3428              :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3429              : 
    3430          144 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3431          144 :                               mulhrs_stmt, new_vectype);
    3432              : }
    3433              : 
    3434              : /* Recognize the patterns:
    3435              : 
    3436              :             ATYPE a;  // narrower than TYPE
    3437              :             BTYPE b;  // narrower than TYPE
    3438              :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3439              :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3440              : 
    3441              :    where only the bottom half of avg is used.  Try to transform them into:
    3442              : 
    3443              :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3444              :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3445              : 
    3446              :   followed by:
    3447              : 
    3448              :             TYPE avg = (TYPE) avg';
    3449              : 
    3450              :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3451              :   of avg is used, all or part of the cast of avg' should become redundant.
    3452              : 
    3453              :   If there is no target support available, generate code to distribute rshift
    3454              :   over plus and add a carry.  */
    3455              : 
    3456              : static gimple *
    3457     30920951 : vect_recog_average_pattern (vec_info *vinfo,
    3458              :                             stmt_vec_info last_stmt_info, tree *type_out)
    3459              : {
    3460              :   /* Check for a shift right by one bit.  */
    3461     30920951 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3462     21197691 :   if (!last_stmt
    3463     21197691 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3464       359787 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3465     30864997 :     return NULL;
    3466              : 
    3467              :   /* Check that the shift result is wider than the users of the
    3468              :      result need (i.e. that narrowing would be a natural choice).  */
    3469        55954 :   tree lhs = gimple_assign_lhs (last_stmt);
    3470        55954 :   tree type = TREE_TYPE (lhs);
    3471        55954 :   unsigned int target_precision
    3472        55954 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3473        55954 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3474              :     return NULL;
    3475              : 
    3476              :   /* Look through any change in sign on the shift input.  */
    3477         2218 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3478         2218 :   vect_unpromoted_value unprom_plus;
    3479         2218 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3480              :                                                      &unprom_plus);
    3481         2218 :   if (!rshift_rhs
    3482         2218 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3483              :     return NULL;
    3484              : 
    3485              :   /* Get the definition of the shift input.  */
    3486         2216 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3487         2216 :   if (!plus_stmt_info)
    3488              :     return NULL;
    3489              : 
    3490              :   /* Check whether the shift input can be seen as a tree of additions on
    3491              :      2 or 3 widened inputs.
    3492              : 
    3493              :      Note that the pattern should be a win even if the result of one or
    3494              :      more additions is reused elsewhere: if the pattern matches, we'd be
    3495              :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3496         8792 :   internal_fn ifn = IFN_AVG_FLOOR;
    3497         8792 :   vect_unpromoted_value unprom[3];
    3498         2198 :   tree new_type;
    3499         2198 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3500         2198 :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3501              :                                             unprom, &new_type);
    3502         2198 :   if (nops == 0)
    3503              :     return NULL;
    3504          907 :   if (nops == 3)
    3505              :     {
    3506              :       /* Check that one operand is 1.  */
    3507              :       unsigned int i;
    3508          987 :       for (i = 0; i < 3; ++i)
    3509          933 :         if (integer_onep (unprom[i].op))
    3510              :           break;
    3511          311 :       if (i == 3)
    3512              :         return NULL;
    3513              :       /* Throw away the 1 operand and keep the other two.  */
    3514          257 :       if (i < 2)
    3515            0 :         unprom[i] = unprom[2];
    3516              :       ifn = IFN_AVG_CEIL;
    3517              :     }
    3518              : 
    3519          853 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3520              : 
    3521              :   /* We know that:
    3522              : 
    3523              :      (a) the operation can be viewed as:
    3524              : 
    3525              :            TYPE widened0 = (TYPE) UNPROM[0];
    3526              :            TYPE widened1 = (TYPE) UNPROM[1];
    3527              :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3528              :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3529              : 
    3530              :      (b) the first two statements are equivalent to:
    3531              : 
    3532              :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3533              :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3534              : 
    3535              :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3536              :          where sensible;
    3537              : 
    3538              :      (d) all the operations can be performed correctly at twice the width of
    3539              :          NEW_TYPE, due to the nature of the average operation; and
    3540              : 
    3541              :      (e) users of the result of the right shift need only TARGET_PRECISION
    3542              :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3543              :          precision.
    3544              : 
    3545              :      Under these circumstances, the only situation in which NEW_TYPE
    3546              :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3547              :      and an addition result are all used more than once.  Thus we can
    3548              :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3549              :      as "free", whereas widening the result of the average instruction
    3550              :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3551              :      therefore better not to go narrower than TARGET_PRECISION.  */
    3552          853 :   if (TYPE_PRECISION (new_type) < target_precision)
    3553            0 :     new_type = build_nonstandard_integer_type (target_precision,
    3554            0 :                                                TYPE_UNSIGNED (new_type));
    3555              : 
    3556              :   /* Check for target support.  */
    3557          853 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3558          853 :   if (!new_vectype)
    3559              :     return NULL;
    3560              : 
    3561          853 :   bool fallback_p = false;
    3562              : 
    3563          853 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3564              :     ;
    3565          695 :   else if (TYPE_UNSIGNED (new_type)
    3566          256 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3567          256 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3568          256 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3569          951 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3570              :     fallback_p = true;
    3571              :   else
    3572          439 :     return NULL;
    3573              : 
    3574              :   /* The IR requires a valid vector type for the cast result, even though
    3575              :      it's likely to be discarded.  */
    3576          414 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3577          414 :   if (!*type_out)
    3578              :     return NULL;
    3579              : 
    3580          410 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3581          410 :   tree new_ops[2];
    3582          410 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3583              :                        unprom, new_vectype);
    3584              : 
    3585          410 :   if (fallback_p)
    3586              :     {
    3587              :       /* As a fallback, generate code for following sequence:
    3588              : 
    3589              :          shifted_op0 = new_ops[0] >> 1;
    3590              :          shifted_op1 = new_ops[1] >> 1;
    3591              :          sum_of_shifted = shifted_op0 + shifted_op1;
    3592              :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3593              :          carry = unmasked_carry & 1;
    3594              :          new_var = sum_of_shifted + carry;
    3595              :       */
    3596              : 
    3597          252 :       tree one_cst = build_one_cst (new_type);
    3598          252 :       gassign *g;
    3599              : 
    3600          252 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3601          252 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3602          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3603              : 
    3604          252 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3605          252 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3606          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3607              : 
    3608          252 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3609          252 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3610              :                                shifted_op0, shifted_op1);
    3611          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3612              : 
    3613          252 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3614          252 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3615          252 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3616          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3617              : 
    3618          252 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3619          252 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3620          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3621              : 
    3622          252 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3623          252 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3624              :     }
    3625              : 
    3626              :   /* Generate the IFN_AVG* call.  */
    3627          158 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3628              :                                                     new_ops[1]);
    3629          158 :   gimple_call_set_lhs (average_stmt, new_var);
    3630          158 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3631              : 
    3632          158 :   if (dump_enabled_p ())
    3633           31 :     dump_printf_loc (MSG_NOTE, vect_location,
    3634              :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3635              : 
    3636          158 :   return vect_convert_output (vinfo, last_stmt_info,
    3637          158 :                               type, average_stmt, new_vectype);
    3638              : }
    3639              : 
    3640              : /* Recognize cases in which the input to a cast is wider than its
    3641              :    output, and the input is fed by a widening operation.  Fold this
    3642              :    by removing the unnecessary intermediate widening.  E.g.:
    3643              : 
    3644              :      unsigned char a;
    3645              :      unsigned int b = (unsigned int) a;
    3646              :      unsigned short c = (unsigned short) b;
    3647              : 
    3648              :    -->
    3649              : 
    3650              :      unsigned short c = (unsigned short) a;
    3651              : 
    3652              :    Although this is rare in input IR, it is an expected side-effect
    3653              :    of the over-widening pattern above.
    3654              : 
    3655              :    This is beneficial also for integer-to-float conversions, if the
    3656              :    widened integer has more bits than the float, and if the unwidened
    3657              :    input doesn't.  */
    3658              : 
    3659              : static gimple *
    3660     30922585 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3661              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3662              : {
    3663              :   /* Check for a cast, including an integer-to-float conversion.  */
    3664     52074948 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3665     21199023 :   if (!last_stmt)
    3666              :     return NULL;
    3667     21199023 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3668     21199023 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3669              :     return NULL;
    3670              : 
    3671              :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3672      3015827 :   tree lhs = gimple_assign_lhs (last_stmt);
    3673      3015827 :   if (!lhs)
    3674              :     return NULL;
    3675      3015827 :   tree lhs_type = TREE_TYPE (lhs);
    3676      3015827 :   scalar_mode lhs_mode;
    3677      2996072 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3678      6010163 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3679        25309 :     return NULL;
    3680              : 
    3681              :   /* Check for a narrowing operation (from a vector point of view).  */
    3682      2990518 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3683      2990518 :   tree rhs_type = TREE_TYPE (rhs);
    3684      2990518 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3685      2681067 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3686      8192186 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3687              :     return NULL;
    3688              : 
    3689              :   /* Try to find an unpromoted input.  */
    3690       343023 :   vect_unpromoted_value unprom;
    3691       343023 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3692       343023 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3693              :     return NULL;
    3694              : 
    3695              :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3696              :      same when extending from UNPROM as they are when extending from RHS.  */
    3697        46790 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3698        46790 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3699              :     return NULL;
    3700              : 
    3701              :   /* We can get the same result by casting UNPROM directly, to avoid
    3702              :      the unnecessary widening and narrowing.  */
    3703        46660 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3704              : 
    3705        46660 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3706        46660 :   if (!*type_out)
    3707              :     return NULL;
    3708              : 
    3709        46660 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3710        46660 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3711        46660 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3712              : 
    3713        46660 :   return pattern_stmt;
    3714              : }
    3715              : 
    3716              : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3717              :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3718              : 
    3719              : static gimple *
    3720     30850614 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3721              :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3722              : {
    3723     30850614 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3724     30850614 :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3725     30850614 :                                       "vect_recog_widen_shift_pattern");
    3726              : }
    3727              : 
    3728              : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3729              : 
    3730              :    type a_t, b_t, c_t;
    3731              : 
    3732              :    S0 a_t = b_t r<< c_t;
    3733              : 
    3734              :   Input/Output:
    3735              : 
    3736              :   * STMT_VINFO: The stmt from which the pattern search begins,
    3737              :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3738              :     with a sequence:
    3739              : 
    3740              :    S1 d_t = -c_t;
    3741              :    S2 e_t = d_t & (B - 1);
    3742              :    S3 f_t = b_t << c_t;
    3743              :    S4 g_t = b_t >> e_t;
    3744              :    S0 a_t = f_t | g_t;
    3745              : 
    3746              :     where B is element bitsize of type.
    3747              : 
    3748              :   Output:
    3749              : 
    3750              :   * TYPE_OUT: The type of the output of this pattern.
    3751              : 
    3752              :   * Return value: A new stmt that will be used to replace the rotate
    3753              :     S0 stmt.  */
    3754              : 
    3755              : static gimple *
    3756     30850614 : vect_recog_rotate_pattern (vec_info *vinfo,
    3757              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3758              : {
    3759     30850614 :   gimple *last_stmt = stmt_vinfo->stmt;
    3760     30850614 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3761     30850614 :   gimple *pattern_stmt, *def_stmt;
    3762     30850614 :   enum tree_code rhs_code;
    3763     30850614 :   enum vect_def_type dt;
    3764     30850614 :   optab optab1, optab2;
    3765     30850614 :   edge ext_def = NULL;
    3766     30850614 :   bool bswap16_p = false;
    3767              : 
    3768     30850614 :   if (is_gimple_assign (last_stmt))
    3769              :     {
    3770     21127000 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3771     21127000 :       switch (rhs_code)
    3772              :         {
    3773         7245 :         case LROTATE_EXPR:
    3774         7245 :         case RROTATE_EXPR:
    3775         7245 :           break;
    3776              :         default:
    3777              :           return NULL;
    3778              :         }
    3779              : 
    3780         7245 :       lhs = gimple_assign_lhs (last_stmt);
    3781         7245 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3782         7245 :       type = TREE_TYPE (oprnd0);
    3783         7245 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3784              :     }
    3785      9723614 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3786              :     {
    3787              :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3788              :          The vectorizer has bswap support, but only if the argument isn't
    3789              :          promoted.  */
    3790          170 :       lhs = gimple_call_lhs (last_stmt);
    3791          170 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3792          170 :       type = TREE_TYPE (oprnd0);
    3793          170 :       if (!lhs
    3794          170 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3795          170 :           || TYPE_PRECISION (type) <= 16
    3796            0 :           || TREE_CODE (oprnd0) != SSA_NAME
    3797          170 :           || BITS_PER_UNIT != 8)
    3798          170 :         return NULL;
    3799              : 
    3800            0 :       stmt_vec_info def_stmt_info;
    3801            0 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3802              :         return NULL;
    3803              : 
    3804            0 :       if (dt != vect_internal_def)
    3805              :         return NULL;
    3806              : 
    3807            0 :       if (gimple_assign_cast_p (def_stmt))
    3808              :         {
    3809            0 :           def = gimple_assign_rhs1 (def_stmt);
    3810            0 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3811            0 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3812              :             oprnd0 = def;
    3813              :         }
    3814              : 
    3815            0 :       type = TREE_TYPE (lhs);
    3816            0 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3817            0 :       if (vectype == NULL_TREE)
    3818              :         return NULL;
    3819              : 
    3820            0 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3821              :         {
    3822              :           /* The encoding uses one stepped pattern for each byte in the
    3823              :              16-bit word.  */
    3824            0 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3825            0 :           for (unsigned i = 0; i < 3; ++i)
    3826            0 :             for (unsigned j = 0; j < 2; ++j)
    3827            0 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3828              : 
    3829            0 :           vec_perm_indices indices (elts, 1,
    3830            0 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3831            0 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3832            0 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3833              :             {
    3834              :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3835              :                  undo the argument promotion.  */
    3836            0 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3837              :                 {
    3838            0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3839            0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3840            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3841            0 :                   oprnd0 = def;
    3842              :                 }
    3843              : 
    3844              :               /* Pattern detected.  */
    3845            0 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3846              : 
    3847            0 :               *type_out = vectype;
    3848              : 
    3849              :               /* Pattern supported.  Create a stmt to be used to replace the
    3850              :                  pattern, with the unpromoted argument.  */
    3851            0 :               var = vect_recog_temp_ssa_var (type, NULL);
    3852            0 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3853              :                                                 1, oprnd0);
    3854            0 :               gimple_call_set_lhs (pattern_stmt, var);
    3855            0 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3856              :                                       gimple_call_fntype (last_stmt));
    3857            0 :               return pattern_stmt;
    3858              :             }
    3859            0 :         }
    3860              : 
    3861            0 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3862            0 :       rhs_code = LROTATE_EXPR;
    3863            0 :       bswap16_p = true;
    3864              :     }
    3865              :   else
    3866              :     return NULL;
    3867              : 
    3868         7245 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3869         7125 :       || !INTEGRAL_TYPE_P (type)
    3870        14049 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3871              :     return NULL;
    3872              : 
    3873         6804 :   stmt_vec_info def_stmt_info;
    3874         6804 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3875              :     return NULL;
    3876              : 
    3877         6804 :   if (dt != vect_internal_def
    3878         6601 :       && dt != vect_constant_def
    3879           25 :       && dt != vect_external_def)
    3880              :     return NULL;
    3881              : 
    3882         6798 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3883         6798 :   if (vectype == NULL_TREE)
    3884              :     return NULL;
    3885              : 
    3886              :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3887              :      don't do anything here.  */
    3888         6571 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3889         6571 :   if (optab1
    3890         6571 :       && can_implement_p (optab1, TYPE_MODE (vectype)))
    3891              :     {
    3892          564 :      use_rotate:
    3893          564 :       if (bswap16_p)
    3894              :         {
    3895            0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3896              :             {
    3897            0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3898            0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3899            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3900            0 :               oprnd0 = def;
    3901              :             }
    3902              : 
    3903              :           /* Pattern detected.  */
    3904            0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3905              : 
    3906            0 :           *type_out = vectype;
    3907              : 
    3908              :           /* Pattern supported.  Create a stmt to be used to replace the
    3909              :              pattern.  */
    3910            0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3911            0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3912              :                                               oprnd1);
    3913            0 :           return pattern_stmt;
    3914              :         }
    3915              :       return NULL;
    3916              :     }
    3917              : 
    3918         6547 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3919              :     {
    3920         6463 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3921         6463 :       if (optab2
    3922         6463 :           && can_implement_p (optab2, TYPE_MODE (vectype)))
    3923          540 :         goto use_rotate;
    3924              :     }
    3925              : 
    3926              :   /* We may not use a reduction operand twice.  */
    3927         6007 :   if (vect_is_reduction (stmt_vinfo))
    3928              :     return NULL;
    3929              : 
    3930         5986 :   tree utype = unsigned_type_for (type);
    3931         5986 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3932         5986 :   if (!uvectype)
    3933              :     return NULL;
    3934              : 
    3935              :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3936              :      don't do anything here either.  */
    3937         5986 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3938         5986 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3939         5986 :   if (!optab1
    3940         5986 :       || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3941          746 :       || !optab2
    3942         6732 :       || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3943              :     {
    3944         5240 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3945              :         return NULL;
    3946         5177 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3947         5177 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3948         5177 :       if (!optab1
    3949         5177 :           || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3950         3942 :           || !optab2
    3951         9119 :           || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3952         1235 :         return NULL;
    3953              :     }
    3954              : 
    3955         4688 :   *type_out = vectype;
    3956              : 
    3957         4688 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3958              :     {
    3959           52 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3960           52 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3961           52 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3962           52 :       oprnd0 = def;
    3963              :     }
    3964              : 
    3965         4688 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3966           15 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3967              : 
    3968         4688 :   def = NULL_TREE;
    3969         4688 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3970         4688 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3971              :     def = oprnd1;
    3972           28 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3973              :     {
    3974            0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3975            0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3976            0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3977            0 :              == TYPE_PRECISION (type))
    3978              :         def = rhs1;
    3979              :     }
    3980              : 
    3981         4660 :   if (def == NULL_TREE)
    3982              :     {
    3983           28 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3984           28 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3985           28 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3986              :     }
    3987         4688 :   stype = TREE_TYPE (def);
    3988              : 
    3989         4688 :   if (TREE_CODE (def) == INTEGER_CST)
    3990              :     {
    3991         4590 :       if (!tree_fits_uhwi_p (def)
    3992         4590 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3993         9180 :           || integer_zerop (def))
    3994            0 :         return NULL;
    3995         4590 :       def2 = build_int_cst (stype,
    3996         4590 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3997              :     }
    3998              :   else
    3999              :     {
    4000           98 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    4001              : 
    4002           98 :       if (vecstype == NULL_TREE)
    4003              :         return NULL;
    4004           98 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4005           98 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    4006           98 :       if (ext_def)
    4007              :         {
    4008           15 :           basic_block new_bb
    4009           15 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4010           15 :           gcc_assert (!new_bb);
    4011              :         }
    4012              :       else
    4013           83 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4014              : 
    4015           98 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4016           98 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    4017           98 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    4018              :                                       gimple_assign_lhs (def_stmt), mask);
    4019           98 :       if (ext_def)
    4020              :         {
    4021           15 :           basic_block new_bb
    4022           15 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4023           15 :           gcc_assert (!new_bb);
    4024              :         }
    4025              :       else
    4026           83 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4027              :     }
    4028              : 
    4029         4688 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    4030         9305 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    4031              :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    4032              :                                   oprnd0, def);
    4033         4688 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4034              : 
    4035         4688 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    4036         9305 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    4037              :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    4038              :                                   oprnd0, def2);
    4039         4688 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4040              : 
    4041              :   /* Pattern detected.  */
    4042         4688 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    4043              : 
    4044              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4045         4688 :   var = vect_recog_temp_ssa_var (utype, NULL);
    4046         4688 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    4047              : 
    4048         4688 :   if (!useless_type_conversion_p (type, utype))
    4049              :     {
    4050           52 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    4051           52 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    4052           52 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    4053              :     }
    4054              :   return pattern_stmt;
    4055              : }
    4056              : 
    4057              : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    4058              :    vectorized:
    4059              : 
    4060              :    type a_t;
    4061              :    TYPE b_T, res_T;
    4062              : 
    4063              :    S1 a_t = ;
    4064              :    S2 b_T = ;
    4065              :    S3 res_T = b_T op a_t;
    4066              : 
    4067              :   where type 'TYPE' is a type with different size than 'type',
    4068              :   and op is <<, >> or rotate.
    4069              : 
    4070              :   Also detect cases:
    4071              : 
    4072              :    type a_t;
    4073              :    TYPE b_T, c_T, res_T;
    4074              : 
    4075              :    S0 c_T = ;
    4076              :    S1 a_t = (type) c_T;
    4077              :    S2 b_T = ;
    4078              :    S3 res_T = b_T op a_t;
    4079              : 
    4080              :   Input/Output:
    4081              : 
    4082              :   * STMT_VINFO: The stmt from which the pattern search begins,
    4083              :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4084              :     with a shift/rotate which has same type on both operands, in the
    4085              :     second case just b_T op c_T, in the first case with added cast
    4086              :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4087              : 
    4088              :   Output:
    4089              : 
    4090              :   * TYPE_OUT: The type of the output of this pattern.
    4091              : 
    4092              :   * Return value: A new stmt that will be used to replace the shift/rotate
    4093              :     S3 stmt.  */
    4094              : 
    4095              : static gimple *
    4096     30855600 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4097              :                                         stmt_vec_info stmt_vinfo,
    4098              :                                         tree *type_out)
    4099              : {
    4100     30855600 :   gimple *last_stmt = stmt_vinfo->stmt;
    4101     30855600 :   tree oprnd0, oprnd1, lhs, var;
    4102     30855600 :   gimple *pattern_stmt;
    4103     30855600 :   enum tree_code rhs_code;
    4104              : 
    4105     30855600 :   if (!is_gimple_assign (last_stmt))
    4106              :     return NULL;
    4107              : 
    4108     21131986 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4109     21131986 :   switch (rhs_code)
    4110              :     {
    4111       506301 :     case LSHIFT_EXPR:
    4112       506301 :     case RSHIFT_EXPR:
    4113       506301 :     case LROTATE_EXPR:
    4114       506301 :     case RROTATE_EXPR:
    4115       506301 :       break;
    4116              :     default:
    4117              :       return NULL;
    4118              :     }
    4119              : 
    4120       506301 :   lhs = gimple_assign_lhs (last_stmt);
    4121       506301 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4122       506301 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4123       506301 :   if (TREE_CODE (oprnd1) != SSA_NAME
    4124       107513 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4125        48905 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4126        48473 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4127       554774 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4128        48473 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4129       457828 :     return NULL;
    4130              : 
    4131        48473 :   stmt_vec_info def_vinfo = vinfo->lookup_def (oprnd1);
    4132        48473 :   if (!def_vinfo || STMT_VINFO_DEF_TYPE (def_vinfo) == vect_external_def)
    4133              :     return NULL;
    4134              : 
    4135        45569 :   def_vinfo = vect_stmt_to_vectorize (def_vinfo);
    4136         1130 :   gcc_assert (def_vinfo);
    4137              : 
    4138        45569 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4139        45569 :   if (*type_out == NULL_TREE)
    4140              :     return NULL;
    4141              : 
    4142        33010 :   tree def = NULL_TREE;
    4143        33010 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4144        19328 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4145              :     {
    4146         5312 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4147         5312 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4148         5312 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4149         1191 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4150              :         {
    4151         1191 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4152         1191 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4153              :             def = rhs1;
    4154              :           else
    4155              :             {
    4156         1104 :               tree mask
    4157         1104 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4158         1104 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4159         1104 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4160         1104 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4161         1104 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4162         1104 :                                                            TREE_TYPE (rhs1));
    4163         1104 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4164              :             }
    4165              :         }
    4166              :     }
    4167              : 
    4168         1191 :   if (def == NULL_TREE)
    4169              :     {
    4170        31819 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4171        31819 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4172        31819 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4173              :     }
    4174              : 
    4175              :   /* Pattern detected.  */
    4176        33010 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4177              : 
    4178              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4179        33010 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4180        33010 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4181              : 
    4182        33010 :   return pattern_stmt;
    4183              : }
    4184              : 
    4185              : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4186              :    needed by the multiplication-by-immediate synthesis algorithm described by
    4187              :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4188              :    present.  Return true iff the target supports all the steps.  */
    4189              : 
    4190              : static bool
    4191       298176 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4192              :                                  tree vectype, bool synth_shift_p)
    4193              : {
    4194       298176 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4195              :     return false;
    4196              : 
    4197       298176 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4198       298176 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4199              : 
    4200       298176 :   if (var == negate_variant
    4201       298176 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4202              :     return false;
    4203              : 
    4204              :   /* If we must synthesize shifts with additions make sure that vector
    4205              :      addition is available.  */
    4206       297584 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4207              :     return false;
    4208              : 
    4209       143674 :   for (int i = 1; i < alg->ops; i++)
    4210              :     {
    4211       107644 :       switch (alg->op[i])
    4212              :         {
    4213              :         case alg_shift:
    4214              :           break;
    4215        26536 :         case alg_add_t_m2:
    4216        26536 :         case alg_add_t2_m:
    4217        26536 :         case alg_add_factor:
    4218        26536 :           if (!supports_vplus)
    4219              :             return false;
    4220              :           break;
    4221        16609 :         case alg_sub_t_m2:
    4222        16609 :         case alg_sub_t2_m:
    4223        16609 :         case alg_sub_factor:
    4224        16609 :           if (!supports_vminus)
    4225              :             return false;
    4226              :           break;
    4227              :         case alg_unknown:
    4228              :         case alg_m:
    4229              :         case alg_zero:
    4230              :         case alg_impossible:
    4231              :           return false;
    4232            0 :         default:
    4233            0 :           gcc_unreachable ();
    4234              :         }
    4235              :     }
    4236              : 
    4237              :   return true;
    4238              : }
    4239              : 
    4240              : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4241              :    putting the final result in DEST.  Append all statements but the last into
    4242              :    VINFO.  Return the last statement.  */
    4243              : 
    4244              : static gimple *
    4245            0 : synth_lshift_by_additions (vec_info *vinfo,
    4246              :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4247              :                            stmt_vec_info stmt_info, tree vectype)
    4248              : {
    4249            0 :   HOST_WIDE_INT i;
    4250            0 :   tree itype = TREE_TYPE (op);
    4251            0 :   tree prev_res = op;
    4252            0 :   gcc_assert (amnt >= 0);
    4253            0 :   for (i = 0; i < amnt; i++)
    4254              :     {
    4255            0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4256              :                       : dest;
    4257            0 :       gimple *stmt
    4258            0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4259            0 :       prev_res = tmp_var;
    4260            0 :       if (i < amnt - 1)
    4261            0 :         append_pattern_def_seq (vinfo, stmt_info, stmt, vectype);
    4262              :       else
    4263            0 :         return stmt;
    4264              :     }
    4265            0 :   gcc_unreachable ();
    4266              :   return NULL;
    4267              : }
    4268              : 
    4269              : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4270              :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4271              :    the process if necessary.  Append the resulting assignment statements
    4272              :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4273              :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4274              :    left shifts using additions.  */
    4275              : 
    4276              : static tree
    4277        43044 : apply_binop_and_append_stmt (vec_info *vinfo,
    4278              :                              tree_code code, tree op1, tree op2,
    4279              :                              stmt_vec_info stmt_vinfo, tree vectype,
    4280              :                              bool synth_shift_p)
    4281              : {
    4282        43044 :   if (integer_zerop (op2)
    4283        43044 :       && (code == LSHIFT_EXPR
    4284        37314 :           || code == PLUS_EXPR))
    4285              :     {
    4286        37314 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4287              :       return op1;
    4288              :     }
    4289              : 
    4290         5730 :   gimple *stmt;
    4291         5730 :   tree itype = TREE_TYPE (op1);
    4292         5730 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4293              : 
    4294         5730 :   if (code == LSHIFT_EXPR
    4295         5730 :       && synth_shift_p)
    4296              :     {
    4297            0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4298            0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo,
    4299              :                                         vectype);
    4300            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4301            0 :       return tmp_var;
    4302              :     }
    4303              : 
    4304         5730 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4305         5730 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4306         5730 :   return tmp_var;
    4307              : }
    4308              : 
    4309              : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4310              :    and simple arithmetic operations to be vectorized.  Record the statements
    4311              :    produced in STMT_VINFO and return the last statement in the sequence or
    4312              :    NULL if it's not possible to synthesize such a multiplication.
    4313              :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4314              :    works on tree-ssa form.  */
    4315              : 
    4316              : static gimple *
    4317       300918 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4318              :                              stmt_vec_info stmt_vinfo)
    4319              : {
    4320       300918 :   tree itype = TREE_TYPE (op);
    4321       300918 :   machine_mode mode = TYPE_MODE (itype);
    4322       300918 :   struct algorithm alg;
    4323       300918 :   mult_variant variant;
    4324       300918 :   if (!tree_fits_shwi_p (val))
    4325              :     return NULL;
    4326              : 
    4327              :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4328              :      signed overflow where the original operation didn't.  Perform the
    4329              :      operations on an unsigned type and cast back to avoid this.
    4330              :      In the future we may want to relax this for synthesis algorithms
    4331              :      that we can prove do not cause unexpected overflow.  */
    4332       298191 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4333              : 
    4334        59197 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4335       298191 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4336       298191 :   if (!vectype)
    4337              :     return NULL;
    4338              : 
    4339              :   /* Targets that don't support vector shifts but support vector additions
    4340              :      can synthesize shifts that way.  */
    4341       298191 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4342              : 
    4343       298191 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4344              :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4345              :      The vectorizer's benefit analysis will decide whether it's beneficial
    4346              :      to do this.  */
    4347       596382 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4348       298191 :                                        ? TYPE_MODE (vectype) : mode,
    4349              :                                        hwval, &alg, &variant, MAX_COST);
    4350       298191 :   if (!possible)
    4351              :     return NULL;
    4352              : 
    4353       298191 :   if (vect_is_reduction (stmt_vinfo))
    4354              :     {
    4355           26 :       int op_uses = alg.op[0] != alg_zero;
    4356           45 :       for (int i = 1; i < alg.ops; i++)
    4357           32 :         switch (alg.op[i])
    4358              :           {
    4359            4 :           case alg_add_t_m2:
    4360            4 :           case alg_sub_t_m2:
    4361            4 :             if (synth_shift_p && alg.log[i])
    4362              :               return NULL;
    4363              :             else
    4364            4 :               op_uses++;
    4365            4 :             break;
    4366            0 :           case alg_add_t2_m:
    4367            0 :           case alg_sub_t2_m:
    4368            0 :             op_uses++;
    4369              :             /* Fallthru.  */
    4370           28 :           case alg_shift:
    4371           28 :             if (synth_shift_p && alg.log[i])
    4372              :               return NULL;
    4373              :             break;
    4374              :           case alg_add_factor:
    4375              :           case alg_sub_factor:
    4376              :             return NULL;
    4377              :           default:
    4378              :             break;
    4379              :           }
    4380           13 :       if (variant == add_variant)
    4381            0 :         op_uses++;
    4382              :       /* When we'll synthesize more than a single use of the reduction
    4383              :          operand the reduction constraints are violated.  Avoid this
    4384              :          situation.  */
    4385           13 :       if (op_uses > 1)
    4386              :         return NULL;
    4387              :     }
    4388              : 
    4389       298176 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4390              :     return NULL;
    4391              : 
    4392        36030 :   tree accumulator;
    4393              : 
    4394              :   /* Clear out the sequence of statements so we can populate it below.  */
    4395        36030 :   gimple *stmt = NULL;
    4396              : 
    4397        36030 :   if (cast_to_unsigned_p)
    4398              :     {
    4399        12239 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4400        12239 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4401        12239 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4402        12239 :       op = tmp_op;
    4403              :     }
    4404              : 
    4405        36030 :   if (alg.op[0] == alg_zero)
    4406          205 :     accumulator = build_int_cst (multtype, 0);
    4407              :   else
    4408              :     accumulator = op;
    4409              : 
    4410        36030 :   bool needs_fixup = (variant == negate_variant)
    4411        36030 :                       || (variant == add_variant);
    4412              : 
    4413       143505 :   for (int i = 1; i < alg.ops; i++)
    4414              :     {
    4415       107475 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4416       107475 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4417       107475 :       tree tmp_var = NULL_TREE;
    4418              : 
    4419       107475 :       switch (alg.op[i])
    4420              :         {
    4421        64431 :         case alg_shift:
    4422        64431 :           if (synth_shift_p)
    4423            0 :             stmt
    4424            0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4425            0 :                                            alg.log[i], stmt_vinfo, vectype);
    4426              :           else
    4427        64431 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4428              :                                          shft_log);
    4429              :           break;
    4430        21707 :         case alg_add_t_m2:
    4431        21707 :           tmp_var
    4432        21707 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4433              :                                            stmt_vinfo, vectype, synth_shift_p);
    4434        21707 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4435              :                                        tmp_var);
    4436        21707 :           break;
    4437        15806 :         case alg_sub_t_m2:
    4438        15806 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4439              :                                                  shft_log, stmt_vinfo,
    4440              :                                                  vectype, synth_shift_p);
    4441              :           /* In some algorithms the first step involves zeroing the
    4442              :              accumulator.  If subtracting from such an accumulator
    4443              :              just emit the negation directly.  */
    4444        15806 :           if (integer_zerop (accumulator))
    4445          205 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4446              :           else
    4447        15601 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4448              :                                         tmp_var);
    4449              :           break;
    4450            0 :         case alg_add_t2_m:
    4451            0 :           tmp_var
    4452            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4453              :                                            shft_log, stmt_vinfo, vectype,
    4454              :                                            synth_shift_p);
    4455            0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4456            0 :           break;
    4457            0 :         case alg_sub_t2_m:
    4458            0 :           tmp_var
    4459            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4460              :                                            shft_log, stmt_vinfo, vectype,
    4461              :                                            synth_shift_p);
    4462            0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4463            0 :           break;
    4464         4766 :         case alg_add_factor:
    4465         4766 :           tmp_var
    4466         4766 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4467              :                                            shft_log, stmt_vinfo, vectype,
    4468              :                                            synth_shift_p);
    4469         4766 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4470              :                                        tmp_var);
    4471         4766 :           break;
    4472          765 :         case alg_sub_factor:
    4473          765 :           tmp_var
    4474          765 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4475              :                                            shft_log, stmt_vinfo, vectype,
    4476              :                                            synth_shift_p);
    4477          765 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4478              :                                       accumulator);
    4479          765 :           break;
    4480            0 :         default:
    4481            0 :           gcc_unreachable ();
    4482              :         }
    4483              :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4484              :          but rather return it directly.  */
    4485              : 
    4486       107475 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4487        83981 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4488       107475 :       accumulator = accum_tmp;
    4489              :     }
    4490        36030 :   if (variant == negate_variant)
    4491              :     {
    4492          429 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4493          429 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4494          429 :       accumulator = accum_tmp;
    4495          429 :       if (cast_to_unsigned_p)
    4496          142 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4497              :     }
    4498        35601 :   else if (variant == add_variant)
    4499              :     {
    4500           99 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4501           99 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4502           99 :       accumulator = accum_tmp;
    4503           99 :       if (cast_to_unsigned_p)
    4504           89 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
    4505              :     }
    4506              :   /* Move back to a signed if needed.  */
    4507        35733 :   if (cast_to_unsigned_p)
    4508              :     {
    4509        12239 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4510        12239 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4511              :     }
    4512              : 
    4513              :   return stmt;
    4514              : }
    4515              : 
    4516              : /* Detect multiplication by constant and convert it into a sequence of
    4517              :    shifts and additions, subtractions, negations.  We reuse the
    4518              :    choose_mult_variant algorithms from expmed.cc
    4519              : 
    4520              :    Input/Output:
    4521              : 
    4522              :    STMT_VINFO: The stmt from which the pattern search begins,
    4523              :    i.e. the mult stmt.
    4524              : 
    4525              :  Output:
    4526              : 
    4527              :   * TYPE_OUT: The type of the output of this pattern.
    4528              : 
    4529              :   * Return value: A new stmt that will be used to replace
    4530              :     the multiplication.  */
    4531              : 
    4532              : static gimple *
    4533     31048776 : vect_recog_mult_pattern (vec_info *vinfo,
    4534              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4535              : {
    4536     31048776 :   gimple *last_stmt = stmt_vinfo->stmt;
    4537     31048776 :   tree oprnd0, oprnd1, vectype, itype;
    4538     31048776 :   gimple *pattern_stmt;
    4539              : 
    4540     31048776 :   if (!is_gimple_assign (last_stmt))
    4541              :     return NULL;
    4542              : 
    4543     21325162 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4544              :     return NULL;
    4545              : 
    4546      1456951 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4547      1456951 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4548      1456951 :   itype = TREE_TYPE (oprnd0);
    4549              : 
    4550      1456951 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4551      1456888 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4552       907680 :       || !INTEGRAL_TYPE_P (itype)
    4553      2364631 :       || !type_has_mode_precision_p (itype))
    4554       549323 :     return NULL;
    4555              : 
    4556       907628 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4557       907628 :   if (vectype == NULL_TREE)
    4558              :     return NULL;
    4559              : 
    4560              :   /* If the target can handle vectorized multiplication natively,
    4561              :      don't attempt to optimize this.  */
    4562       737942 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4563       737942 :   if (mul_optab != unknown_optab
    4564       737942 :       && can_implement_p (mul_optab, TYPE_MODE (vectype)))
    4565              :     return NULL;
    4566              : 
    4567       300918 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4568              :                                               oprnd0, oprnd1, stmt_vinfo);
    4569       300918 :   if (!pattern_stmt)
    4570              :     return NULL;
    4571              : 
    4572              :   /* Pattern detected.  */
    4573        36030 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4574              : 
    4575        36030 :   *type_out = vectype;
    4576              : 
    4577        36030 :   return pattern_stmt;
    4578              : }
    4579              : 
    4580              : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
    4581              : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
    4582              : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4583              : 
    4584              : extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
    4585              : 
    4586              : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
    4587              : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
    4588              : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4589              : 
    4590              : static gimple *
    4591          300 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
    4592              :                                      internal_fn fn, tree *type_out,
    4593              :                                      tree lhs, tree op_0, tree op_1)
    4594              : {
    4595          300 :   tree itype = TREE_TYPE (op_0);
    4596          300 :   tree otype = TREE_TYPE (lhs);
    4597          300 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4598          300 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4599              : 
    4600          300 :   if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4601          300 :     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    4602              :     {
    4603           97 :       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
    4604           97 :       tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
    4605              : 
    4606           97 :       gimple_call_set_lhs (call, in_ssa);
    4607           97 :       gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4608           97 :       gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
    4609              : 
    4610           97 :       *type_out = v_otype;
    4611              : 
    4612           97 :       if (types_compatible_p (itype, otype))
    4613              :         return call;
    4614              :       else
    4615              :         {
    4616            0 :           append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
    4617            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4618              : 
    4619            0 :           return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
    4620              :         }
    4621              :     }
    4622              : 
    4623              :   return NULL;
    4624              : }
    4625              : 
    4626              : /*
    4627              :  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
    4628              :  *   _7 = _4 + _6;
    4629              :  *   _8 = _4 > _7;
    4630              :  *   _9 = (long unsigned int) _8;
    4631              :  *   _10 = -_9;
    4632              :  *   _12 = _7 | _10;
    4633              :  *
    4634              :  * And then simplified to
    4635              :  *   _12 = .SAT_ADD (_4, _6);
    4636              :  */
    4637              : 
    4638              : static gimple *
    4639     31119900 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4640              :                             tree *type_out)
    4641              : {
    4642     31119900 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4643              : 
    4644     31119900 :   if (!is_gimple_assign (last_stmt))
    4645              :     return NULL;
    4646              : 
    4647     21396286 :   tree ops[2];
    4648     21396286 :   tree lhs = gimple_assign_lhs (last_stmt);
    4649              : 
    4650     21396286 :   if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
    4651     21396286 :       || gimple_signed_integer_sat_add (lhs, ops, NULL))
    4652              :     {
    4653           62 :       if (TREE_CODE (ops[1]) == INTEGER_CST)
    4654           12 :         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
    4655              : 
    4656           62 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4657              :                                                           IFN_SAT_ADD, type_out,
    4658              :                                                           lhs, ops[0], ops[1]);
    4659           62 :       if (stmt)
    4660              :         {
    4661           44 :           vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
    4662           44 :           return stmt;
    4663              :         }
    4664              :     }
    4665              : 
    4666              :   return NULL;
    4667              : }
    4668              : 
    4669              : /*
    4670              :  * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
    4671              :  * the benchmark zip.  Aka:
    4672              :  *
    4673              :  *   unsigned int _1;
    4674              :  *   unsigned int _2;
    4675              :  *   unsigned short int _4;
    4676              :  *   _9 = (unsigned short int).SAT_SUB (_1, _2);
    4677              :  *
    4678              :  *   if _1 is known to be in the range of unsigned short int.  For example
    4679              :  *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
    4680              :  *   truncation to:
    4681              :  *
    4682              :  *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
    4683              :  *   _9 = .SAT_SUB (_4, _3);
    4684              :  *
    4685              :  *   Then,  we can better vectorized code and avoid the unnecessary narrowing
    4686              :  *   stmt during vectorization with below stmt(s).
    4687              :  *
    4688              :  *   _3 = .SAT_TRUNC(_2); // SI => HI
    4689              :  *   _9 = .SAT_SUB (_4, _3);
    4690              :  */
    4691              : static void
    4692          238 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
    4693              :                                       stmt_vec_info stmt_vinfo,
    4694              :                                       tree lhs, tree *ops)
    4695              : {
    4696          238 :   tree otype = TREE_TYPE (lhs);
    4697          238 :   tree itype = TREE_TYPE (ops[0]);
    4698          238 :   unsigned itype_prec = TYPE_PRECISION (itype);
    4699          238 :   unsigned otype_prec = TYPE_PRECISION (otype);
    4700              : 
    4701          238 :   if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
    4702          238 :     return;
    4703              : 
    4704            0 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4705            0 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4706            0 :   tree_pair v_pair = tree_pair (v_otype, v_itype);
    4707              : 
    4708            0 :   if (v_otype == NULL_TREE || v_itype == NULL_TREE
    4709            0 :     || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
    4710              :                                         OPTIMIZE_FOR_BOTH))
    4711            0 :     return;
    4712              : 
    4713              :   /* 1. Find the _4 and update ops[0] as above example.  */
    4714            0 :   vect_unpromoted_value unprom;
    4715            0 :   tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
    4716              : 
    4717            0 :   if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
    4718              :     return;
    4719              : 
    4720            0 :   ops[0] = tmp;
    4721              : 
    4722              :   /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
    4723            0 :   tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4724            0 :   gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
    4725              : 
    4726            0 :   gimple_call_set_lhs (call, trunc_lhs_ssa);
    4727            0 :   gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4728            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
    4729              : 
    4730            0 :   ops[1] = trunc_lhs_ssa;
    4731              : }
    4732              : 
    4733              : /*
    4734              :  * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
    4735              :  * Unsigned:
    4736              :  *   _7 = _1 >= _2;
    4737              :  *   _8 = _1 - _2;
    4738              :  *   _10 = (long unsigned int) _7;
    4739              :  *   _9 = _8 * _10;
    4740              :  *
    4741              :  * And then simplified to
    4742              :  *   _9 = .SAT_SUB (_1, _2);
    4743              :  *
    4744              :  * Signed:
    4745              :  *   x.0_4 = (unsigned char) x_16;
    4746              :  *   y.1_5 = (unsigned char) y_18;
    4747              :  *   _6 = x.0_4 - y.1_5;
    4748              :  *   minus_19 = (int8_t) _6;
    4749              :  *   _7 = x_16 ^ y_18;
    4750              :  *   _8 = x_16 ^ minus_19;
    4751              :  *   _44 = _7 < 0;
    4752              :  *   _23 = x_16 < 0;
    4753              :  *   _24 = (signed char) _23;
    4754              :  *   _58 = (unsigned char) _24;
    4755              :  *   _59 = -_58;
    4756              :  *   _25 = (signed char) _59;
    4757              :  *   _26 = _25 ^ 127;
    4758              :  *   _42 = _8 < 0;
    4759              :  *   _41 = _42 & _44;
    4760              :  *   iftmp.2_11 = _41 ? _26 : minus_19;
    4761              :  *
    4762              :  * And then simplified to
    4763              :  *   iftmp.2_11 = .SAT_SUB (x_16, y_18);
    4764              :  */
    4765              : 
    4766              : static gimple *
    4767     31119856 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4768              :                             tree *type_out)
    4769              : {
    4770     31119856 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4771              : 
    4772     31119856 :   if (!is_gimple_assign (last_stmt))
    4773              :     return NULL;
    4774              : 
    4775     21396242 :   tree ops[2];
    4776     21396242 :   tree lhs = gimple_assign_lhs (last_stmt);
    4777              : 
    4778     21396242 :   if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
    4779     21396242 :       || gimple_signed_integer_sat_sub (lhs, ops, NULL))
    4780              :     {
    4781          238 :       vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
    4782          238 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4783              :                                                           IFN_SAT_SUB, type_out,
    4784              :                                                           lhs, ops[0], ops[1]);
    4785          238 :       if (stmt)
    4786              :         {
    4787           53 :           vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
    4788           53 :           return stmt;
    4789              :         }
    4790              :     }
    4791              : 
    4792              :   return NULL;
    4793              : }
    4794              : 
    4795              : /*
    4796              :  * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
    4797              :  *   overflow_5 = x_4(D) > 4294967295;
    4798              :  *   _1 = (unsigned int) x_4(D);
    4799              :  *   _2 = (unsigned int) overflow_5;
    4800              :  *   _3 = -_2;
    4801              :  *   _6 = _1 | _3;
    4802              :  *
    4803              :  * And then simplified to
    4804              :  *   _6 = .SAT_TRUNC (x_4(D));
    4805              :  */
    4806              : 
    4807              : static gimple *
    4808     31119803 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4809              :                               tree *type_out)
    4810              : {
    4811     31119803 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4812              : 
    4813     31119803 :   if (!is_gimple_assign (last_stmt))
    4814              :     return NULL;
    4815              : 
    4816     21396189 :   tree ops[1];
    4817     21396189 :   tree lhs = gimple_assign_lhs (last_stmt);
    4818     21396189 :   tree otype = TREE_TYPE (lhs);
    4819              : 
    4820     21396189 :   if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
    4821     21396189 :        && type_has_mode_precision_p (otype))
    4822              :     {
    4823           16 :       tree itype = TREE_TYPE (ops[0]);
    4824           16 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4825           16 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4826           16 :       internal_fn fn = IFN_SAT_TRUNC;
    4827              : 
    4828           16 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4829           32 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4830              :                                            OPTIMIZE_FOR_BOTH))
    4831              :         {
    4832            0 :           tree temp = vect_recog_temp_ssa_var (itype, NULL);
    4833            0 :           gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, itype, build_zero_cst(itype), ops[0]));
    4834            0 :           append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
    4835              : 
    4836            0 :           gcall *call = gimple_build_call_internal (fn, 1, temp);
    4837            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4838              : 
    4839            0 :           gimple_call_set_lhs (call, out_ssa);
    4840            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4841            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4842              : 
    4843            0 :           *type_out = v_otype;
    4844              : 
    4845            0 :           return call;
    4846              :         }
    4847              : 
    4848              :     }
    4849              : 
    4850     21396189 :   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    4851     21395866 :        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
    4852     21396189 :       && type_has_mode_precision_p (otype))
    4853              :     {
    4854          311 :       tree itype = TREE_TYPE (ops[0]);
    4855          311 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4856          311 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4857          311 :       internal_fn fn = IFN_SAT_TRUNC;
    4858              : 
    4859          305 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4860          616 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4861              :                                            OPTIMIZE_FOR_BOTH))
    4862              :         {
    4863            0 :           gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
    4864            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4865              : 
    4866            0 :           gimple_call_set_lhs (call, out_ssa);
    4867            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4868            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4869              : 
    4870            0 :           *type_out = v_otype;
    4871              : 
    4872            0 :           return call;
    4873              :         }
    4874              :     }
    4875              : 
    4876              :   return NULL;
    4877              : }
    4878              : 
    4879              : 
    4880              : /* Function add_code_for_floorceilround_divmod
    4881              :    A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
    4882              :    FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
    4883              :    ROUND_DIV_EXPR
    4884              :    The quotient and remainder are needed for implemented these operators.
    4885              :    FLOOR cases
    4886              :    r = x %[fl] y; r = x/[fl] y;
    4887              :    is
    4888              :    r = x % y; if (r && (x ^ y) < 0) r += y;
    4889              :    r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
    4890              :    Produce following sequence
    4891              :    v0 = x^y
    4892              :    v1 = -r
    4893              :    v2 = r | -r
    4894              :    v3 = v0 & v2
    4895              :    v4 = v3 < 0
    4896              :    if (floor_mod)
    4897              :      v5 = v4 ? y : 0
    4898              :      v6 = r + v5
    4899              :    if (floor_div)
    4900              :      v5 = v4 ? 1 : 0
    4901              :      v6 = d - 1
    4902              :    Similar sequences of vector instructions are produces for following cases
    4903              :    CEIL cases
    4904              :    r = x %[cl] y; r = x/[cl] y;
    4905              :    is
    4906              :    r = x % y; if (r && (x ^ y) >= 0) r -= y;
    4907              :    r = x % y; if (r) r -= y; (unsigned)
    4908              :    r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
    4909              :    r = x % y; d = x/y; if (r) d++; (unsigned)
    4910              :    ROUND cases
    4911              :    r = x %[rd] y; r = x/[rd] y;
    4912              :    is
    4913              :    r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
    4914              :    r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
    4915              :    r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
    4916              :    r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
    4917              :    Inputs:
    4918              :      VECTYPE: Vector type of the operands
    4919              :      STMT_VINFO: Statement where pattern begins
    4920              :      RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
    4921              :      Q: The quotient of division
    4922              :      R: Remainder of division
    4923              :      OPRDN0/OPRND1: Actual operands involved
    4924              :      ITYPE: tree type of oprnd0
    4925              :    Output:
    4926              :      NULL if vectorization not possible
    4927              :      Gimple statement based on rhs_code
    4928              : */
    4929              : static gimple *
    4930          431 : add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
    4931              :                                     stmt_vec_info stmt_vinfo,
    4932              :                                     enum tree_code rhs_code, tree q, tree r,
    4933              :                                     tree oprnd0, tree oprnd1, tree itype)
    4934              : {
    4935          431 :   gimple *def_stmt;
    4936          431 :   tree mask_vectype = truth_type_for (vectype);
    4937          431 :   if (!mask_vectype)
    4938              :     return NULL;
    4939          431 :   tree bool_cond;
    4940          431 :   bool unsigned_p = TYPE_UNSIGNED (itype);
    4941              : 
    4942          431 :   switch (rhs_code)
    4943              :     {
    4944          395 :     case FLOOR_MOD_EXPR:
    4945          395 :     case FLOOR_DIV_EXPR:
    4946          395 :     case CEIL_MOD_EXPR:
    4947          395 :     case CEIL_DIV_EXPR:
    4948          395 :       {
    4949          395 :         if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
    4950          363 :             || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
    4951          363 :             || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
    4952          363 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    4953          363 :             || !target_has_vecop_for_code (MINUS_EXPR, vectype)
    4954          363 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    4955          631 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    4956          159 :           return NULL;
    4957          236 :         if (unsigned_p)
    4958              :           {
    4959           18 :             gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4960              : 
    4961           18 :             if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
    4962              :               return NULL;
    4963           18 :             bool is_mod = rhs_code == CEIL_MOD_EXPR;
    4964              :             // r > 0
    4965           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    4966           18 :             def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
    4967              :                                             build_int_cst (itype, 0));
    4968           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    4969              :                                     itype);
    4970              : 
    4971              :             // (r > 0) ? y : 0 (mod)
    4972              :             // (r > 0) ? 1 : 0 (ceil)
    4973           18 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    4974           18 :             def_stmt
    4975           27 :               = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    4976            9 :                                      is_mod ? oprnd1 : build_int_cst (itype, 1),
    4977              :                                      build_int_cst (itype, 0));
    4978           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4979              : 
    4980              :             // r -= (r > 0) ? y : 0 (mod)
    4981              :             // d += (x^y < 0 && r) ? -1 : 0 (ceil)
    4982           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    4983           27 :             return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
    4984           18 :                                         is_mod ? r : q, extr_cond);
    4985              :           }
    4986              :         else
    4987              :           {
    4988          218 :             bool ceil_p
    4989          218 :               = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4990          218 :             if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
    4991              :               return NULL;
    4992              :             // x ^ y
    4993          218 :             tree xort = vect_recog_temp_ssa_var (itype, NULL);
    4994          218 :             def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
    4995          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4996              : 
    4997          218 :             tree cond_reg = xort;
    4998              :             // ~(x ^ y) (ceil)
    4999          218 :             if (ceil_p)
    5000              :               {
    5001           18 :                 cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    5002           18 :                 def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
    5003           18 :                 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5004              :               }
    5005              : 
    5006              :             // -r
    5007          218 :             tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
    5008          218 :             def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
    5009          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5010              : 
    5011              :             // r | -r , sign bit is set if r!=0
    5012          218 :             tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
    5013          218 :             def_stmt
    5014          218 :               = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
    5015          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5016              : 
    5017              :             // (x ^ y) & (r | -r)
    5018              :             // ~(x ^ y) & (r | -r) (ceil)
    5019          218 :             tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
    5020          218 :             def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
    5021              :                                             r_or_negr, cond_reg);
    5022          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5023              : 
    5024              :             // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
    5025          218 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5026          218 :             def_stmt
    5027          218 :               = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
    5028              :                                      build_int_cst (itype, 0));
    5029          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5030              :                                     itype);
    5031              : 
    5032              :             // (x^y < 0 && r) ? y : 0 (mod)
    5033              :             // (x^y < 0 && r) ? -1 : 0 (div)
    5034          218 :             bool is_mod
    5035          218 :               = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
    5036          218 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    5037          258 :             def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    5038              :                                             is_mod ? oprnd1
    5039           40 :                                                    : build_int_cst (itype, -1),
    5040              :                                             build_int_cst (itype, 0));
    5041          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5042              : 
    5043              :             // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
    5044              :             // d += (x^y < 0 && r) ? -1 : 0 (floor div)
    5045              :             // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
    5046              :             // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
    5047          218 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5048          436 :             return gimple_build_assign (result,
    5049          218 :                                         (rhs_code == FLOOR_MOD_EXPR
    5050          218 :                                          || rhs_code == FLOOR_DIV_EXPR)
    5051              :                                           ? PLUS_EXPR
    5052              :                                           : MINUS_EXPR,
    5053          218 :                                         is_mod ? r : q, extr_cond);
    5054              :           }
    5055              :       }
    5056           36 :     case ROUND_MOD_EXPR:
    5057           36 :     case ROUND_DIV_EXPR:
    5058           36 :       {
    5059           36 :         if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
    5060           36 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    5061           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    5062           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
    5063           72 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    5064            0 :           return NULL;
    5065              : 
    5066           36 :         bool is_mod = rhs_code == ROUND_MOD_EXPR;
    5067           36 :         HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5068           36 :         unsigned HOST_WIDE_INT abs_d
    5069              :           = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
    5070           36 :         unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
    5071           36 :         if (!unsigned_p)
    5072              :           {
    5073              :             // check availability of abs expression for vector
    5074           18 :             if (!target_has_vecop_for_code (ABS_EXPR, vectype))
    5075              :               return NULL;
    5076              :             // abs (r)
    5077           18 :             tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
    5078           18 :             def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
    5079           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5080              : 
    5081              :             // abs (r) > (abs (y-1) >> 1)
    5082           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5083           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
    5084           18 :                                             build_int_cst (itype, mid_d));
    5085           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5086              :                                     itype);
    5087              : 
    5088              :             // x ^ y
    5089           18 :             tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    5090           18 :             def_stmt
    5091           18 :               = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
    5092           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5093              : 
    5094              :             // x ^ y < 0
    5095           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5096           18 :             def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
    5097              :                                             build_int_cst (itype, 0));
    5098           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5099              :                                     itype);
    5100              : 
    5101              :             // x ^ y < 0 ? y : -y (mod)
    5102              :             // x ^ y < 0 ? -1 : 1 (div)
    5103           18 :             tree val1 = vect_recog_temp_ssa_var (itype, NULL);
    5104           18 :             def_stmt
    5105           36 :               = gimple_build_assign (val1, COND_EXPR, bool_cond,
    5106           27 :                                      build_int_cst (itype, is_mod ? d : -1),
    5107           18 :                                      build_int_cst (itype, is_mod ? -d : 1));
    5108           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5109           18 :             int precision = TYPE_PRECISION (itype);
    5110           18 :             wide_int wmask = wi::mask (precision, false, precision);
    5111              : 
    5112              :             // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
    5113           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5114           36 :             def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
    5115           18 :                                             wide_int_to_tree (itype, wmask),
    5116              :                                             build_int_cst (itype, 0));
    5117           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5118              : 
    5119           18 :             tree fval = vect_recog_temp_ssa_var (itype, NULL);
    5120           18 :             def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
    5121           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5122              : 
    5123           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5124           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5125              :                                         fval);
    5126           18 :           }
    5127              :         else
    5128              :           {
    5129              :             // r > (y-1 >> 1)
    5130           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5131           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
    5132           18 :                                             build_int_cst (itype, mid_d));
    5133           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5134              :                                     itype);
    5135              : 
    5136              :             // (r > (y-1)>>1) ? -d : 1
    5137           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5138           18 :             def_stmt
    5139           36 :               = gimple_build_assign (val2, COND_EXPR, round_p,
    5140           18 :                                      build_int_cst (itype, is_mod ? -d : 1),
    5141              :                                      build_int_cst (itype, 0));
    5142           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5143              : 
    5144           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5145           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5146           18 :                                         val2);
    5147              :           }
    5148              :       }
    5149              :     default:
    5150              :       return NULL;
    5151              :     }
    5152              : }
    5153              : 
    5154              : /* Detect a signed division by a constant that wouldn't be
    5155              :    otherwise vectorized:
    5156              : 
    5157              :    type a_t, b_t;
    5158              : 
    5159              :    S1 a_t = b_t / N;
    5160              : 
    5161              :   where type 'type' is an integral type and N is a constant.
    5162              : 
    5163              :   Similarly handle modulo by a constant:
    5164              : 
    5165              :    S4 a_t = b_t % N;
    5166              : 
    5167              :   Input/Output:
    5168              : 
    5169              :   * STMT_VINFO: The stmt from which the pattern search begins,
    5170              :     i.e. the division stmt.  S1 is replaced by if N is a power
    5171              :     of two constant and type is signed:
    5172              :   S3  y_t = b_t < 0 ? N - 1 : 0;
    5173              :   S2  x_t = b_t + y_t;
    5174              :   S1' a_t = x_t >> log2 (N);
    5175              : 
    5176              :     S4 is replaced if N is a power of two constant and
    5177              :     type is signed by (where *_T temporaries have unsigned type):
    5178              :   S9  y_T = b_t < 0 ? -1U : 0U;
    5179              :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    5180              :   S7  z_t = (type) z_T;
    5181              :   S6  w_t = b_t + z_t;
    5182              :   S5  x_t = w_t & (N - 1);
    5183              :   S4' a_t = x_t - z_t;
    5184              : 
    5185              :   Output:
    5186              : 
    5187              :   * TYPE_OUT: The type of the output of this pattern.
    5188              : 
    5189              :   * Return value: A new stmt that will be used to replace the division
    5190              :     S1 or modulo S4 stmt.  */
    5191              : 
    5192              : static gimple *
    5193     30855513 : vect_recog_divmod_pattern (vec_info *vinfo,
    5194              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    5195              : {
    5196     30855513 :   gimple *last_stmt = stmt_vinfo->stmt;
    5197     30855513 :   tree oprnd0, oprnd1, vectype, itype, cond;
    5198     30855513 :   gimple *pattern_stmt = NULL;
    5199     30855513 :   gimple *def_stmt = NULL;
    5200     30855513 :   enum tree_code rhs_code;
    5201     30855513 :   optab optab;
    5202     30855513 :   tree q, cst;
    5203     30855513 :   int prec;
    5204              : 
    5205     30855513 :   if (!is_gimple_assign (last_stmt)
    5206              :       /* The pattern will disrupt the reduction chain with multiple uses.  */
    5207     30855513 :       || vect_is_reduction (stmt_vinfo))
    5208              :     return NULL;
    5209              : 
    5210     21023382 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5211     21023382 :   switch (rhs_code)
    5212              :     {
    5213       276985 :     case TRUNC_DIV_EXPR:
    5214       276985 :     case EXACT_DIV_EXPR:
    5215       276985 :     case TRUNC_MOD_EXPR:
    5216       276985 :     case FLOOR_MOD_EXPR:
    5217       276985 :     case FLOOR_DIV_EXPR:
    5218       276985 :     case CEIL_MOD_EXPR:
    5219       276985 :     case CEIL_DIV_EXPR:
    5220       276985 :     case ROUND_MOD_EXPR:
    5221       276985 :     case ROUND_DIV_EXPR:
    5222       276985 :       break;
    5223              :     default:
    5224              :       return NULL;
    5225              :     }
    5226              : 
    5227       276985 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5228       276985 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5229       276985 :   itype = TREE_TYPE (oprnd0);
    5230       276985 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5231       259379 :       || TREE_CODE (oprnd1) != INTEGER_CST
    5232       165242 :       || TREE_CODE (itype) != INTEGER_TYPE
    5233       442227 :       || !type_has_mode_precision_p (itype))
    5234       111743 :     return NULL;
    5235              : 
    5236       165242 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    5237       165242 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5238       165242 :   if (vectype == NULL_TREE)
    5239              :     return NULL;
    5240              : 
    5241       132566 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    5242              :     {
    5243              :       /* If the target can handle vectorized division or modulo natively,
    5244              :          don't attempt to optimize this, since native division is likely
    5245              :          to give smaller code.  */
    5246         2224 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    5247         2224 :       if (optab != unknown_optab
    5248         2224 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    5249              :         return NULL;
    5250              :     }
    5251              : 
    5252       132566 :   prec = TYPE_PRECISION (itype);
    5253              : 
    5254       265132 :   bool is_flclrd_moddiv_p
    5255       132566 :     = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
    5256              :     || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
    5257       131955 :     || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
    5258       132566 :   if (integer_pow2p (oprnd1))
    5259              :     {
    5260        79972 :       if ((TYPE_UNSIGNED (itype)
    5261           57 :            && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
    5262        80026 :           || tree_int_cst_sgn (oprnd1) != 1)
    5263            3 :         return NULL;
    5264              : 
    5265              :       /* Pattern detected.  */
    5266        79969 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5267              : 
    5268        79969 :       *type_out = vectype;
    5269              : 
    5270              :       /* Check if the target supports this internal function.  */
    5271        79969 :       internal_fn ifn = IFN_DIV_POW2;
    5272        79969 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    5273              :         {
    5274            0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    5275              : 
    5276            0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    5277            0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    5278            0 :           gimple_call_set_lhs (div_stmt, var_div);
    5279            0 :           if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5280              :             {
    5281            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    5282            0 :               tree t1 = vect_recog_temp_ssa_var (itype, NULL);
    5283            0 :               def_stmt
    5284            0 :                 = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
    5285            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5286            0 :               pattern_stmt
    5287            0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5288              :                                        MINUS_EXPR, oprnd0, t1);
    5289            0 :               if (is_flclrd_moddiv_p)
    5290              :                 {
    5291            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5292            0 :                   pattern_stmt
    5293            0 :                     = add_code_for_floorceilround_divmod (vectype, vinfo,
    5294              :                                                           stmt_vinfo, rhs_code,
    5295              :                                                           var_div, t1, oprnd0,
    5296              :                                                           oprnd1, itype);
    5297            0 :                   if (pattern_stmt == NULL)
    5298              :                     return NULL;
    5299              :                 }
    5300              :             }
    5301              :           else
    5302              :             pattern_stmt = div_stmt;
    5303            0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    5304              : 
    5305            0 :           return pattern_stmt;
    5306              :         }
    5307              : 
    5308        79969 :       cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5309        79969 :       def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
    5310              :                                       build_int_cst (itype, 0));
    5311        79969 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
    5312              :                               truth_type_for (vectype), itype);
    5313        79969 :       tree div_result = NULL_TREE;
    5314        79969 :       if (rhs_code == TRUNC_DIV_EXPR
    5315        79969 :           || rhs_code == EXACT_DIV_EXPR
    5316              :           || rhs_code == FLOOR_DIV_EXPR
    5317         2689 :           || rhs_code == CEIL_DIV_EXPR
    5318         2530 :           || rhs_code == ROUND_DIV_EXPR)
    5319              :         {
    5320        77451 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    5321        77451 :           tree shift;
    5322        77451 :           def_stmt
    5323        77451 :             = gimple_build_assign (var, COND_EXPR, cond,
    5324              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5325              :                                                 build_int_cst (itype, 1)),
    5326              :                                    build_int_cst (itype, 0));
    5327        77451 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5328        77451 :           var = vect_recog_temp_ssa_var (itype, NULL);
    5329        77451 :           def_stmt
    5330        77451 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    5331              :                                    gimple_assign_lhs (def_stmt));
    5332        77451 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5333              : 
    5334        77451 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    5335        77451 :           div_result = vect_recog_temp_ssa_var (itype, NULL);
    5336        77451 :           pattern_stmt
    5337        77451 :             = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
    5338              :         }
    5339        79969 :       if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5340              :         {
    5341         2689 :           if (rhs_code == FLOOR_DIV_EXPR
    5342              :               || rhs_code == CEIL_DIV_EXPR
    5343         2689 :               || rhs_code == ROUND_DIV_EXPR)
    5344          171 :             append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5345              : 
    5346         2689 :           tree signmask;
    5347         2689 :           if (compare_tree_int (oprnd1, 2) == 0)
    5348              :             {
    5349         1283 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5350         1283 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    5351              :                                               build_int_cst (itype, 1),
    5352              :                                               build_int_cst (itype, 0));
    5353         1283 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5354              :             }
    5355              :           else
    5356              :             {
    5357         1406 :               tree utype
    5358         1406 :                 = build_nonstandard_integer_type (prec, 1);
    5359         1406 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    5360         1406 :               tree shift
    5361         1406 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    5362         1406 :                                         - tree_log2 (oprnd1));
    5363         1406 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    5364              : 
    5365         1406 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    5366              :                                               build_int_cst (utype, -1),
    5367              :                                               build_int_cst (utype, 0));
    5368         1406 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5369         1406 :               var = vect_recog_temp_ssa_var (utype, NULL);
    5370         1406 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    5371              :                                               gimple_assign_lhs (def_stmt),
    5372              :                                               shift);
    5373         1406 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5374         1406 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5375         1406 :               def_stmt
    5376         1406 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    5377         1406 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5378              :             }
    5379         2689 :           def_stmt
    5380         2689 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5381              :                                    PLUS_EXPR, oprnd0, signmask);
    5382         2689 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5383         2689 :           def_stmt
    5384         2689 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5385              :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    5386              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5387              :                                                 build_int_cst (itype, 1)));
    5388         2689 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5389              : 
    5390         2689 :           tree r = vect_recog_temp_ssa_var (itype, NULL);
    5391         2689 :           pattern_stmt
    5392         2689 :             = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
    5393              :                                    signmask);
    5394         2689 :           if (is_flclrd_moddiv_p)
    5395              :             {
    5396          285 :               append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5397          285 :               pattern_stmt
    5398          285 :                 = add_code_for_floorceilround_divmod (vectype, vinfo,
    5399              :                                                       stmt_vinfo, rhs_code,
    5400              :                                                       div_result, r, oprnd0,
    5401              :                                                       oprnd1, itype);
    5402          285 :               if (pattern_stmt == NULL)
    5403              :                 return NULL;
    5404              :             }
    5405              :         }
    5406              : 
    5407        79810 :       return pattern_stmt;
    5408              :     }
    5409              : 
    5410        52594 :   if ((cst = uniform_integer_cst_p (oprnd1))
    5411        52594 :       && TYPE_UNSIGNED (itype)
    5412              :       && rhs_code == TRUNC_DIV_EXPR
    5413        29441 :       && vectype
    5414        70311 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    5415              :     {
    5416              :       /* We can use the relationship:
    5417              : 
    5418              :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    5419              : 
    5420              :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    5421              :          is therefore a shift right.  When operating in modes that are
    5422              :          multiples of a byte in size, there are two cases:
    5423              : 
    5424              :          (1) N(N+3) is not representable, in which case the question
    5425              :              becomes whether the replacement expression overflows.
    5426              :              It is enough to test that x+N+2 does not overflow,
    5427              :              i.e. that x < MAX-(N+1).
    5428              : 
    5429              :          (2) N(N+3) is representable, in which case it is the (only)
    5430              :              bound that we need to check.
    5431              : 
    5432              :          ??? For now we just handle the case where // (N+1) is a shift
    5433              :          right by half the precision, since some architectures can
    5434              :          optimize the associated addition and shift combinations
    5435              :          into single instructions.  */
    5436              : 
    5437        12023 :       auto wcst = wi::to_wide (cst);
    5438        12023 :       int pow = wi::exact_log2 (wcst + 1);
    5439        12023 :       if (pow == prec / 2)
    5440              :         {
    5441          472 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    5442              : 
    5443          472 :           gimple_ranger ranger;
    5444          472 :           int_range_max r;
    5445              : 
    5446              :           /* Check that no overflow will occur.  If we don't have range
    5447              :              information we can't perform the optimization.  */
    5448              : 
    5449          472 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    5450              :             {
    5451          470 :               wide_int max = r.upper_bound ();
    5452          470 :               wide_int one = wi::shwi (1, prec);
    5453          470 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    5454          470 :               wi::overflow_type ovf;
    5455          470 :               wi::add (max, adder, UNSIGNED, &ovf);
    5456          470 :               if (ovf == wi::OVF_NONE)
    5457              :                 {
    5458          313 :                   *type_out = vectype;
    5459          313 :                   tree tadder = wide_int_to_tree (itype, adder);
    5460          313 :                   tree rshift = wide_int_to_tree (itype, pow);
    5461              : 
    5462          313 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    5463          313 :                   gassign *patt1
    5464          313 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    5465          313 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5466              : 
    5467          313 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    5468          313 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    5469              :                                                rshift);
    5470          313 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5471              : 
    5472          313 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    5473          313 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    5474              :                                                oprnd0);
    5475          313 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5476              : 
    5477          313 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    5478          313 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    5479              :                                                       new_lhs3, rshift);
    5480              : 
    5481          313 :                   return pattern_stmt;
    5482              :                 }
    5483          470 :             }
    5484          472 :         }
    5485              :     }
    5486              : 
    5487        52281 :   if (prec > HOST_BITS_PER_WIDE_INT
    5488        52281 :       || integer_zerop (oprnd1))
    5489          262 :     return NULL;
    5490              : 
    5491        52019 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    5492              :     return NULL;
    5493              : 
    5494        14109 :   if (TYPE_UNSIGNED (itype))
    5495              :     {
    5496         8742 :       unsigned HOST_WIDE_INT mh, ml;
    5497         8742 :       int pre_shift, post_shift;
    5498         8742 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    5499         8742 :                                   & GET_MODE_MASK (itype_mode));
    5500         8742 :       tree t1, t2, t3, t4;
    5501              : 
    5502         8742 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    5503              :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    5504           23 :         return NULL;
    5505              : 
    5506              :       /* Find a suitable multiplier and right shift count instead of
    5507              :          directly dividing by D.  */
    5508         8719 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
    5509              : 
    5510              :       /* If the suggested multiplier is more than PREC bits, we can do better
    5511              :          for even divisors, using an initial right shift.  */
    5512         8719 :       if (mh != 0 && (d & 1) == 0)
    5513              :         {
    5514          248 :           pre_shift = ctz_or_zero (d);
    5515          248 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    5516              :                                   &ml, &post_shift);
    5517          248 :           gcc_assert (!mh);
    5518              :         }
    5519              :       else
    5520              :         pre_shift = 0;
    5521              : 
    5522          634 :       if (mh != 0)
    5523              :         {
    5524          634 :           if (post_shift - 1 >= prec)
    5525              :             return NULL;
    5526              : 
    5527              :           /* t1 = oprnd0 h* ml;
    5528              :              t2 = oprnd0 - t1;
    5529              :              t3 = t2 >> 1;
    5530              :              t4 = t1 + t3;
    5531              :              q = t4 >> (post_shift - 1);  */
    5532          634 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    5533          634 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5534          634 :                                           build_int_cst (itype, ml));
    5535          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5536              : 
    5537          634 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5538          634 :           def_stmt
    5539          634 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    5540          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5541              : 
    5542          634 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5543          634 :           def_stmt
    5544          634 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    5545          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5546              : 
    5547          634 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5548          634 :           def_stmt
    5549          634 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    5550              : 
    5551          634 :           if (post_shift != 1)
    5552              :             {
    5553          634 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5554              : 
    5555          634 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5556          634 :               pattern_stmt
    5557          634 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    5558          634 :                                        build_int_cst (itype, post_shift - 1));
    5559              :             }
    5560              :           else
    5561              :             {
    5562              :               q = t4;
    5563              :               pattern_stmt = def_stmt;
    5564              :             }
    5565              :         }
    5566              :       else
    5567              :         {
    5568         8085 :           if (pre_shift >= prec || post_shift >= prec)
    5569              :             return NULL;
    5570              : 
    5571              :           /* t1 = oprnd0 >> pre_shift;
    5572              :              t2 = t1 h* ml;
    5573              :              q = t2 >> post_shift;  */
    5574         8085 :           if (pre_shift)
    5575              :             {
    5576          248 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    5577          248 :               def_stmt
    5578          248 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    5579          248 :                                        build_int_cst (NULL, pre_shift));
    5580          248 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5581              :             }
    5582              :           else
    5583              :             t1 = oprnd0;
    5584              : 
    5585         8085 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5586         8085 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    5587         8085 :                                           build_int_cst (itype, ml));
    5588              : 
    5589         8085 :           if (post_shift)
    5590              :             {
    5591         8075 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5592              : 
    5593         8075 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5594         8075 :               def_stmt
    5595         8075 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    5596         8075 :                                        build_int_cst (itype, post_shift));
    5597              :             }
    5598              :           else
    5599              :             q = t2;
    5600              : 
    5601              :           pattern_stmt = def_stmt;
    5602              :         }
    5603              :     }
    5604              :   else
    5605              :     {
    5606         5367 :       unsigned HOST_WIDE_INT ml;
    5607         5367 :       int post_shift;
    5608         5367 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5609         5367 :       unsigned HOST_WIDE_INT abs_d;
    5610         5367 :       bool add = false;
    5611         5367 :       tree t1, t2, t3, t4;
    5612              : 
    5613              :       /* Give up for -1.  */
    5614         5367 :       if (d == -1)
    5615            0 :         return NULL;
    5616              : 
    5617              :       /* Since d might be INT_MIN, we have to cast to
    5618              :          unsigned HOST_WIDE_INT before negating to avoid
    5619              :          undefined signed overflow.  */
    5620         5367 :       abs_d = (d >= 0
    5621         5367 :                ? (unsigned HOST_WIDE_INT) d
    5622              :                : - (unsigned HOST_WIDE_INT) d);
    5623              : 
    5624              :       /* n rem d = n rem -d */
    5625         5367 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    5626              :         {
    5627            0 :           d = abs_d;
    5628            0 :           oprnd1 = build_int_cst (itype, abs_d);
    5629              :         }
    5630         5367 :       if (HOST_BITS_PER_WIDE_INT >= prec
    5631         5367 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    5632              :         /* This case is not handled correctly below.  */
    5633              :         return NULL;
    5634              : 
    5635         5367 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
    5636         5367 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    5637              :         {
    5638         1586 :           add = true;
    5639         1586 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    5640              :         }
    5641         5367 :       if (post_shift >= prec)
    5642              :         return NULL;
    5643              : 
    5644              :       /* t1 = oprnd0 h* ml;  */
    5645         5367 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5646         5367 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5647         5367 :                                       build_int_cst (itype, ml));
    5648              : 
    5649         5367 :       if (add)
    5650              :         {
    5651              :           /* t2 = t1 + oprnd0;  */
    5652         1586 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5653         1586 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5654         1586 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    5655              :         }
    5656              :       else
    5657              :         t2 = t1;
    5658              : 
    5659         5367 :       if (post_shift)
    5660              :         {
    5661              :           /* t3 = t2 >> post_shift;  */
    5662         4559 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5663         4559 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5664         4559 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    5665         4559 :                                           build_int_cst (itype, post_shift));
    5666              :         }
    5667              :       else
    5668              :         t3 = t2;
    5669              : 
    5670         5367 :       int msb = 1;
    5671         5367 :       int_range_max r;
    5672        10734 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    5673         5367 :       if (!r.varying_p () && !r.undefined_p ())
    5674              :         {
    5675         2966 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    5676              :             msb = 0;
    5677          738 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    5678              :             msb = -1;
    5679              :         }
    5680              : 
    5681         2228 :       if (msb == 0 && d >= 0)
    5682              :         {
    5683              :           /* q = t3;  */
    5684              :           q = t3;
    5685              :           pattern_stmt = def_stmt;
    5686              :         }
    5687              :       else
    5688              :         {
    5689              :           /* t4 = oprnd0 >> (prec - 1);
    5690              :              or if we know from VRP that oprnd0 >= 0
    5691              :              t4 = 0;
    5692              :              or if we know from VRP that oprnd0 < 0
    5693              :              t4 = -1;  */
    5694         3199 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5695         3199 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5696         3199 :           if (msb != 1)
    5697           68 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5698           68 :                                             build_int_cst (itype, msb));
    5699              :           else
    5700         3131 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5701         3131 :                                             build_int_cst (itype, prec - 1));
    5702         3199 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5703              : 
    5704              :           /* q = t3 - t4;  or q = t4 - t3;  */
    5705         3199 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5706         6218 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5707              :                                               d < 0 ? t3 : t4);
    5708              :         }
    5709         5367 :     }
    5710              : 
    5711        14086 :   if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5712              :     {
    5713         6752 :       tree r, t1;
    5714              : 
    5715              :       /* We divided.  Now finish by:
    5716              :          t1 = q * oprnd1;
    5717              :          r = oprnd0 - t1;  */
    5718         6752 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5719              : 
    5720         6752 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5721         6752 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5722         6752 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5723              : 
    5724         6752 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5725         6752 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5726              : 
    5727         6752 :       if (is_flclrd_moddiv_p)
    5728              :         {
    5729          146 :         append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5730          146 :         pattern_stmt
    5731          146 :           = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
    5732              :                                                 rhs_code, q, r, oprnd0, oprnd1,
    5733              :                                                 itype);
    5734          146 :         if (pattern_stmt == NULL)
    5735              :           return NULL;
    5736              :         }
    5737              :     }
    5738              : 
    5739              :   /* Pattern detected.  */
    5740        14086 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5741              : 
    5742        14086 :   *type_out = vectype;
    5743        14086 :   return pattern_stmt;
    5744              : }
    5745              : 
    5746              : /* Detects pattern with a modulo operation (S1) where both arguments
    5747              :    are variables of integral type.
    5748              :    The statement is replaced by division, multiplication, and subtraction.
    5749              :    The last statement (S4) is returned.
    5750              : 
    5751              :    Example:
    5752              :    S1 c_t = a_t % b_t;
    5753              : 
    5754              :    is replaced by
    5755              :    S2 x_t = a_t / b_t;
    5756              :    S3 y_t = x_t * b_t;
    5757              :    S4 z_t = a_t - y_t;  */
    5758              : 
    5759              : static gimple *
    5760     31048776 : vect_recog_mod_var_pattern (vec_info *vinfo,
    5761              :                             stmt_vec_info stmt_vinfo, tree *type_out)
    5762              : {
    5763     31048776 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5764     31048776 :   tree oprnd0, oprnd1, vectype, itype;
    5765     31048776 :   gimple *pattern_stmt, *def_stmt;
    5766     31048776 :   enum tree_code rhs_code;
    5767              : 
    5768     31048776 :   if (!is_gimple_assign (last_stmt) || vect_is_reduction (stmt_vinfo))
    5769              :     return NULL;
    5770              : 
    5771     21216645 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5772     21216645 :   if (rhs_code != TRUNC_MOD_EXPR)
    5773              :     return NULL;
    5774              : 
    5775        68865 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5776        68865 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5777        68865 :   itype = TREE_TYPE (oprnd0);
    5778        68865 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5779        60542 :       || TREE_CODE (oprnd1) != SSA_NAME
    5780        44050 :       || TREE_CODE (itype) != INTEGER_TYPE)
    5781              :     return NULL;
    5782              : 
    5783        43923 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5784              : 
    5785        43923 :   if (!vectype
    5786        35845 :       || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
    5787        35845 :       || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
    5788            0 :       || !target_has_vecop_for_code (MULT_EXPR, vectype)
    5789        43923 :       || !target_has_vecop_for_code (MINUS_EXPR, vectype))
    5790        43923 :     return NULL;
    5791              : 
    5792            0 :   tree q, tmp, r;
    5793            0 :   q = vect_recog_temp_ssa_var (itype, NULL);
    5794            0 :   def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
    5795            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5796              : 
    5797            0 :   tmp = vect_recog_temp_ssa_var (itype, NULL);
    5798            0 :   def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
    5799            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5800              : 
    5801            0 :   r = vect_recog_temp_ssa_var (itype, NULL);
    5802            0 :   pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
    5803              : 
    5804              :   /* Pattern detected.  */
    5805            0 :   *type_out = vectype;
    5806            0 :   vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
    5807              : 
    5808            0 :   return pattern_stmt;
    5809              : }
    5810              : 
    5811              : 
    5812              : /* Return the proper type for converting bool VAR into
    5813              :    an integer value or NULL_TREE if no such type exists.
    5814              :    The type is chosen so that the converted value has the
    5815              :    same number of elements as VAR's vector type.  */
    5816              : 
    5817              : static tree
    5818      4426723 : integer_type_for_mask (tree var, vec_info *vinfo, vect_def_type *dt = nullptr)
    5819              : {
    5820      4426723 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5821              :     return NULL_TREE;
    5822              : 
    5823      2013877 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (var);
    5824      2013877 :   if (dt)
    5825              :     {
    5826       342400 :       if (!def_stmt_info)
    5827         3401 :         *dt = vect_external_def;
    5828              :       else
    5829       338999 :         *dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
    5830              :     }
    5831       342400 :   if (!def_stmt_info
    5832      1924302 :       || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
    5833      3595779 :       || !vect_use_mask_type_p (def_stmt_info))
    5834       777075 :     return NULL_TREE;
    5835              : 
    5836      1236802 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5837              : }
    5838              : 
    5839              : /* Function vect_recog_gcond_pattern
    5840              : 
    5841              :    Try to find pattern like following:
    5842              : 
    5843              :      if (a op b)
    5844              : 
    5845              :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5846              : 
    5847              :      mask = a op b
    5848              :      if (mask != 0)
    5849              : 
    5850              :    and set the mask type on MASK.
    5851              : 
    5852              :    Input:
    5853              : 
    5854              :    * STMT_VINFO: The stmt at the end from which the pattern
    5855              :                  search begins, i.e. cast of a bool to
    5856              :                  an integer type.
    5857              : 
    5858              :    Output:
    5859              : 
    5860              :    * TYPE_OUT: The type of the output of this pattern.
    5861              : 
    5862              :    * Return value: A new stmt that will be used to replace the pattern.  */
    5863              : 
    5864              : static gimple *
    5865     31119803 : vect_recog_gcond_pattern (vec_info *vinfo,
    5866              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5867              : {
    5868              :   /* Currently we only support this for loop vectorization and when multiple
    5869              :      exits.  */
    5870     31119803 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5871      4496499 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5872              :     return NULL;
    5873              : 
    5874      1643304 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5875      1643304 :   gcond* cond = NULL;
    5876     31139203 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5877              :     return NULL;
    5878              : 
    5879       382352 :   auto lhs = gimple_cond_lhs (cond);
    5880       382352 :   auto rhs = gimple_cond_rhs (cond);
    5881       382352 :   auto code = gimple_cond_code (cond);
    5882              : 
    5883       382352 :   tree scalar_type = TREE_TYPE (lhs);
    5884       382352 :   if (VECTOR_TYPE_P (scalar_type))
    5885              :     return NULL;
    5886              : 
    5887              :   /* If the input is a boolean then try to figure out the precision that the
    5888              :      vector type should use.  We cannot use the scalar precision as this would
    5889              :      later mismatch.  This is similar to what recog_bool does.  */
    5890       382352 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5891              :     {
    5892        10249 :       if (tree stype = integer_type_for_mask (lhs, vinfo))
    5893       382352 :         scalar_type = stype;
    5894              :     }
    5895              : 
    5896       382352 :   tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
    5897       382352 :   if (vectype == NULL_TREE)
    5898              :     return NULL;
    5899              : 
    5900       362952 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5901       362952 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5902       362952 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5903              : 
    5904       362952 :   gimple *pattern_stmt
    5905       362952 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5906       362952 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5907              :                          NULL_TREE, NULL_TREE);
    5908       362952 :   *type_out = vectype;
    5909       362952 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5910       362952 :   return pattern_stmt;
    5911              : }
    5912              : 
    5913              : 
    5914              : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5915              :    conversion of MASK to a type suitable for masking VECTYPE.
    5916              :    Built statement gets required vectype and is appended to
    5917              :    a pattern sequence of STMT_VINFO.
    5918              : 
    5919              :    Return converted mask.  */
    5920              : 
    5921              : static tree
    5922       126885 : build_mask_conversion (vec_info *vinfo,
    5923              :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5924              : {
    5925       126885 :   gimple *stmt;
    5926       126885 :   tree masktype, tmp;
    5927              : 
    5928       126885 :   masktype = truth_type_for (vectype);
    5929       126885 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5930       126885 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5931       126885 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5932       126885 :                           stmt, masktype, TREE_TYPE (vectype));
    5933              : 
    5934       126885 :   return tmp;
    5935              : }
    5936              : 
    5937              : 
    5938              : /* Return MASK if MASK is suitable for masking an operation on vectors
    5939              :    of type VECTYPE, otherwise convert it into such a form and return
    5940              :    the result.  Associate any conversion statements with STMT_INFO's
    5941              :    pattern.  */
    5942              : 
    5943              : static tree
    5944        73625 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    5945              :                                stmt_vec_info stmt_info, vec_info *vinfo)
    5946              : {
    5947        73625 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    5948        73625 :   if (mask_type)
    5949              :     {
    5950        73625 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    5951        73625 :       if (mask_vectype
    5952       147250 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    5953        87965 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    5954        59285 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    5955              :     }
    5956        73625 :   return mask;
    5957              : }
    5958              : 
    5959              : 
    5960              : /* Function vect_recog_bool_pattern
    5961              : 
    5962              :    Try to find pattern like following:
    5963              : 
    5964              :      bool a_b, b_b, c_b, d_b, e_b;
    5965              :      TYPE f_T;
    5966              :    loop:
    5967              :      S1  a_b = x1 CMP1 y1;
    5968              :      S2  b_b = x2 CMP2 y2;
    5969              :      S3  c_b = a_b & b_b;
    5970              :      S4  d_b = x3 CMP3 y3;
    5971              :      S5  e_b = c_b | d_b;
    5972              :      S6  f_T = (TYPE) e_b;
    5973              : 
    5974              :    where type 'TYPE' is an integral type.  Or a similar pattern
    5975              :    ending in
    5976              : 
    5977              :      S6  f_Y = e_b ? r_Y : s_Y;
    5978              : 
    5979              :    as results from if-conversion of a complex condition.
    5980              : 
    5981              :    Input:
    5982              : 
    5983              :    * STMT_VINFO: The stmt at the end from which the pattern
    5984              :                  search begins, i.e. cast of a bool to
    5985              :                  an integer type.
    5986              : 
    5987              :    Output:
    5988              : 
    5989              :    * TYPE_OUT: The type of the output of this pattern.
    5990              : 
    5991              :    * Return value: A new stmt that will be used to replace the pattern.
    5992              : 
    5993              :         Assuming size of TYPE is the same as size of all comparisons
    5994              :         (otherwise some casts would be added where needed), the above
    5995              :         sequence we create related pattern stmts:
    5996              :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5997              :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5998              :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5999              :         S5'  e_T = c_T | d_T;
    6000              :         S6'  f_T = e_T;
    6001              : 
    6002              :         Instead of the above S3' we could emit:
    6003              :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    6004              :         S3'  c_T = a_T | b_T;
    6005              :         but the above is more efficient.  */
    6006              : 
    6007              : static gimple *
    6008     31119803 : vect_recog_bool_pattern (vec_info *vinfo,
    6009              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    6010              : {
    6011     31119803 :   gimple *last_stmt = stmt_vinfo->stmt;
    6012     31119803 :   enum tree_code rhs_code;
    6013     31119803 :   tree var, lhs, rhs, vectype;
    6014     31119803 :   gimple *pattern_stmt;
    6015              : 
    6016     31119803 :   if (!is_gimple_assign (last_stmt))
    6017              :     return NULL;
    6018              : 
    6019     21759141 :   var = gimple_assign_rhs1 (last_stmt);
    6020     21759141 :   lhs = gimple_assign_lhs (last_stmt);
    6021     21759141 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6022              : 
    6023     21759141 :   if (rhs_code == VIEW_CONVERT_EXPR)
    6024       192005 :     var = TREE_OPERAND (var, 0);
    6025              : 
    6026     21759141 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    6027              :     return NULL;
    6028              : 
    6029       733889 :   hash_set<gimple *> bool_stmts;
    6030              : 
    6031       733889 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    6032              :       || rhs_code == VIEW_CONVERT_EXPR
    6033              :       || rhs_code == FLOAT_EXPR)
    6034              :     {
    6035       174885 :       if (! (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    6036         2111 :              || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    6037       173308 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6038              :         return NULL;
    6039        82075 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6040              : 
    6041        82075 :       tree type = integer_type_for_mask (var, vinfo);
    6042        82075 :       tree cst0, cst1, tmp;
    6043              : 
    6044        82075 :       if (!type)
    6045              :         return NULL;
    6046              : 
    6047              :       /* We may directly use cond with narrowed type to avoid multiple cond
    6048              :          exprs with following result packing and perform single cond with
    6049              :          packed mask instead.  In case of widening we better make cond first
    6050              :          and then extract results.  */
    6051        42319 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    6052        29542 :         type = TREE_TYPE (lhs);
    6053              : 
    6054        42319 :       cst0 = build_int_cst (type, 0);
    6055        42319 :       cst1 = build_int_cst (type, 1);
    6056        42319 :       tmp = vect_recog_temp_ssa_var (type, NULL);
    6057        42319 :       pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    6058              : 
    6059        42319 :       if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    6060              :         {
    6061        12777 :           tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    6062        12777 :           append_pattern_def_seq (vinfo, stmt_vinfo,
    6063              :                                   pattern_stmt, new_vectype);
    6064              : 
    6065        12777 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6066        12777 :           pattern_stmt
    6067        25240 :             = gimple_build_assign (lhs, (rhs_code == FLOAT_EXPR
    6068              :                                          ? FLOAT_EXPR : CONVERT_EXPR), tmp);
    6069              :         }
    6070              : 
    6071        42319 :       *type_out = vectype;
    6072        42319 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6073              : 
    6074        42319 :       return pattern_stmt;
    6075              :     }
    6076              :   else if (rhs_code == COND_EXPR
    6077       213186 :            && TREE_CODE (var) == SSA_NAME)
    6078              :     {
    6079       213186 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6080       213186 :       if (vectype == NULL_TREE)
    6081              :         return NULL;
    6082              : 
    6083              :       /* Build a scalar type for the boolean result that when
    6084              :          vectorized matches the vector type of the result in
    6085              :          size and number of elements.  */
    6086       198499 :       unsigned prec
    6087       198499 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    6088              :                                TYPE_VECTOR_SUBPARTS (vectype));
    6089              : 
    6090       198499 :       tree type
    6091       396998 :         = build_nonstandard_integer_type (prec,
    6092       198499 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    6093       198499 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    6094              :         return NULL;
    6095              : 
    6096       198499 :       enum vect_def_type dt;
    6097       198499 :       if (integer_type_for_mask (var, vinfo))
    6098              :         return NULL;
    6099        36562 :       else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
    6100        36562 :                && vect_is_simple_use (var, vinfo, &dt)
    6101        36562 :                && (dt == vect_external_def
    6102        36555 :                    || dt == vect_constant_def))
    6103              :         {
    6104              :           /* If the condition is already a boolean then manually convert it to a
    6105              :              mask of the given integer type but don't set a vectype.  */
    6106         1383 :           tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
    6107         1383 :           pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
    6108              :                                               build_all_ones_cst (type),
    6109              :                                               build_zero_cst (type));
    6110         1383 :           append_inv_pattern_def_seq (vinfo, pattern_stmt);
    6111         1383 :           var = lhs_ivar;
    6112              :         }
    6113              : 
    6114        36562 :       tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6115        36562 :       pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
    6116        36562 :                                           build_zero_cst (TREE_TYPE (var)));
    6117              : 
    6118        36562 :       tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
    6119        36562 :       if (!new_vectype)
    6120              :         return NULL;
    6121              : 
    6122        36562 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
    6123        36562 :                               TREE_TYPE (var));
    6124              : 
    6125        36562 :       lhs_var = vect_convert_mask_for_vectype (lhs_var, vectype, stmt_vinfo,
    6126              :                                                vinfo);
    6127              : 
    6128        36562 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6129        36562 :       pattern_stmt
    6130        36562 :         = gimple_build_assign (lhs, COND_EXPR, lhs_var,
    6131              :                                gimple_assign_rhs2 (last_stmt),
    6132              :                                gimple_assign_rhs3 (last_stmt));
    6133        36562 :       *type_out = vectype;
    6134        36562 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6135              : 
    6136        36562 :       return pattern_stmt;
    6137              :     }
    6138       432891 :   else if (rhs_code == BIT_NOT_EXPR
    6139       432891 :            && !vect_use_mask_type_p (stmt_vinfo))
    6140              :     {
    6141              :       /* When we have a bool data inversion rewrite that to an XOR to
    6142              :          cope with the fact that we'll use a wider vector element type.  */
    6143         8724 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6144         8724 :       pattern_stmt
    6145         8724 :         = gimple_build_assign (lhs, BIT_XOR_EXPR, var,
    6146         8724 :                                build_all_ones_cst (TREE_TYPE (var)));
    6147         8724 :       *type_out = NULL_TREE;
    6148         8724 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6149              : 
    6150         8724 :       return pattern_stmt;
    6151              :     }
    6152       424167 :   else if ((rhs_code == BIT_XOR_EXPR
    6153              :             || rhs_code == BIT_AND_EXPR
    6154       424167 :             || rhs_code == BIT_IOR_EXPR)
    6155       331236 :            && TREE_CODE (var) == SSA_NAME)
    6156              :     {
    6157       331236 :       tree rhs2 = gimple_assign_rhs2 (last_stmt);
    6158       331236 :       if (TREE_CODE (rhs2) != SSA_NAME)
    6159              :         return NULL;
    6160       331236 :       tree lhs_type = integer_type_for_mask (lhs, vinfo);
    6161       331236 :       if (!lhs_type)
    6162              :         return NULL;
    6163       171200 :       vectype = get_mask_type_for_scalar_type (vinfo, lhs_type);
    6164       171200 :       if (!vectype)
    6165              :         return NULL;
    6166       171200 :       vect_def_type dt1, dt2;
    6167       171200 :       tree rhs1_type = integer_type_for_mask (var, vinfo, &dt1);
    6168       171200 :       tree rhs2_type = integer_type_for_mask (rhs2, vinfo, &dt2);
    6169       171200 :       if ((rhs1_type || dt1 == vect_external_def)
    6170       157759 :           && (rhs2_type || dt2 == vect_external_def))
    6171              :         return NULL;
    6172              :       /* When one input is a mask and the other is not create a pattern
    6173              :          stmt sequence that creates a mask for the non-mask input and
    6174              :          convert it to one suitable for the output mask used.  */
    6175        32630 :       if (rhs1_type && !rhs2_type)
    6176              :         {
    6177        19189 :           tree rhs1_vectype = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6178        19189 :           if (!rhs1_vectype)
    6179              :             return NULL;
    6180        19189 :           tree rhs2_vectype = get_vectype_for_scalar_type (vinfo,
    6181        19189 :                                                            TREE_TYPE (rhs2));
    6182        19189 :           if (!rhs2_vectype)
    6183              :             return NULL;
    6184        19189 :           tree new_vectype = truth_type_for (rhs2_vectype);
    6185        19189 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6186        19189 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, rhs2,
    6187              :                                               build_zero_cst
    6188        19189 :                                                 (TREE_TYPE (rhs2)));
    6189        19189 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6190        19189 :                                   new_vectype, TREE_TYPE (new_vectype));
    6191        19189 :           rhs2 = vect_convert_mask_for_vectype (tem, rhs1_vectype,
    6192              :                                                 stmt_vinfo, vinfo);
    6193              :         }
    6194        13441 :       else if (!rhs1_type && rhs2_type)
    6195              :         {
    6196        13441 :           tree rhs2_vectype = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6197        13441 :           if (!rhs2_vectype)
    6198              :             return NULL;
    6199        13441 :           tree rhs1_vectype = get_vectype_for_scalar_type (vinfo,
    6200        13441 :                                                            TREE_TYPE (var));
    6201        13441 :           if (!rhs1_vectype)
    6202              :             return NULL;
    6203        13441 :           tree new_vectype = truth_type_for (rhs1_vectype);
    6204        13441 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6205        13441 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, var,
    6206              :                                               build_zero_cst
    6207        13441 :                                                 (TREE_TYPE (var)));
    6208        13441 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6209        13441 :                                   new_vectype, TREE_TYPE (new_vectype));
    6210        13441 :           var = vect_convert_mask_for_vectype (tem, rhs2_vectype,
    6211              :                                                stmt_vinfo, vinfo);
    6212              :         }
    6213        32630 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6214        32630 :       pattern_stmt = gimple_build_assign (lhs, rhs_code, var, rhs2);
    6215        32630 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6216        32630 :       *type_out = vectype;
    6217        32630 :       return pattern_stmt;
    6218              :     }
    6219        92931 :   else if (rhs_code == SSA_NAME
    6220        26193 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    6221              :     {
    6222         7807 :       stmt_vec_info pattern_stmt_info;
    6223         7807 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6224         7807 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    6225            0 :         return NULL;
    6226              : 
    6227         7807 :       tree type = integer_type_for_mask (var, vinfo);
    6228         7807 :       if (!type)
    6229              :         return NULL;
    6230              : 
    6231         4433 :       var = vect_convert_mask_for_vectype (var, vectype, stmt_vinfo, vinfo);
    6232              : 
    6233         4433 :       tree cst0 = build_int_cst (TREE_TYPE (vectype), 0);
    6234         4433 :       tree cst1 = build_int_cst (TREE_TYPE (vectype), 1);
    6235         4433 :       rhs = vect_recog_temp_ssa_var (TREE_TYPE (vectype), NULL);
    6236         4433 :       pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    6237         4433 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype);
    6238              : 
    6239         4433 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    6240         4433 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    6241         4433 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6242         4433 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6243         4433 :       *type_out = vectype;
    6244         4433 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6245              : 
    6246         4433 :       return pattern_stmt;
    6247              :     }
    6248              :   else
    6249              :     return NULL;
    6250       733889 : }
    6251              : 
    6252              : 
    6253              : /* Function vect_recog_mask_conversion_pattern
    6254              : 
    6255              :    Try to find statements which require boolean type
    6256              :    conversion.  Additional conversion statements are
    6257              :    added to handle such cases.  For example:
    6258              : 
    6259              :    bool m_1, m_2, m_3;
    6260              :    int i_4, i_5;
    6261              :    double d_6, d_7;
    6262              :    char c_1, c_2, c_3;
    6263              : 
    6264              :    S1   m_1 = i_4 > i_5;
    6265              :    S2   m_2 = d_6 < d_7;
    6266              :    S3   m_3 = m_1 & m_2;
    6267              :    S4   c_1 = m_3 ? c_2 : c_3;
    6268              : 
    6269              :    Will be transformed into:
    6270              : 
    6271              :    S1   m_1 = i_4 > i_5;
    6272              :    S2   m_2 = d_6 < d_7;
    6273              :    S3'' m_2' = (_Bool[bitsize=32])m_2
    6274              :    S3'  m_3' = m_1 & m_2';
    6275              :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    6276              :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    6277              : 
    6278              : static gimple *
    6279     31140951 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    6280              :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    6281              : {
    6282     31140951 :   gimple *last_stmt = stmt_vinfo->stmt;
    6283     31140951 :   enum tree_code rhs_code;
    6284     31140951 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    6285     31140951 :   tree vectype1, vectype2;
    6286     31140951 :   stmt_vec_info pattern_stmt_info;
    6287              : 
    6288              :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    6289              :      conversion.  */
    6290     31140951 :   if (is_gimple_call (last_stmt)
    6291     31140951 :       && gimple_call_internal_p (last_stmt))
    6292              :     {
    6293       115050 :       gcall *pattern_stmt;
    6294              : 
    6295       115050 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    6296       115050 :       int mask_argno = internal_fn_mask_index (ifn);
    6297       115050 :       if (mask_argno < 0)
    6298              :         return NULL;
    6299              : 
    6300        15443 :       bool store_p = internal_store_fn_p (ifn);
    6301        15443 :       bool load_p = internal_store_fn_p (ifn);
    6302        15443 :       if (store_p)
    6303              :         {
    6304         2660 :           int rhs_index = internal_fn_stored_value_index (ifn);
    6305         2660 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    6306         2660 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    6307              :         }
    6308              :       else
    6309              :         {
    6310        12783 :           lhs = gimple_call_lhs (last_stmt);
    6311        12783 :           if (!lhs)
    6312              :             return NULL;
    6313        12783 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6314              :         }
    6315              : 
    6316        15443 :       if (!vectype1)
    6317              :         return NULL;
    6318              : 
    6319        15163 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    6320        15163 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    6321        15163 :       if (mask_arg_type)
    6322              :         {
    6323        13421 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    6324              : 
    6325        13421 :           if (!vectype2
    6326        13421 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6327              :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    6328         8564 :             return NULL;
    6329              :         }
    6330         1742 :       else if (store_p || load_p)
    6331              :         return NULL;
    6332              : 
    6333         6282 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    6334              : 
    6335         6282 :       auto_vec<tree, 8> args;
    6336         6282 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    6337         6282 :       args.safe_grow (nargs, true);
    6338        31410 :       for (unsigned int i = 0; i < nargs; ++i)
    6339        25128 :         args[i] = ((int) i == mask_argno
    6340        25128 :                    ? tmp
    6341        18846 :                    : gimple_call_arg (last_stmt, i));
    6342         6282 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    6343              : 
    6344         6282 :       if (!store_p)
    6345              :         {
    6346         6010 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6347         6010 :           gimple_call_set_lhs (pattern_stmt, lhs);
    6348              :         }
    6349              : 
    6350         6010 :       if (load_p || store_p)
    6351          272 :         gimple_call_set_nothrow (pattern_stmt, true);
    6352              : 
    6353         6282 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6354         6282 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    6355         1977 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6356              : 
    6357         6282 :       *type_out = vectype1;
    6358         6282 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6359              : 
    6360         6282 :       return pattern_stmt;
    6361         6282 :     }
    6362              : 
    6363     31025901 :   if (!is_gimple_assign (last_stmt))
    6364              :     return NULL;
    6365              : 
    6366     21780289 :   gimple *pattern_stmt;
    6367     21780289 :   lhs = gimple_assign_lhs (last_stmt);
    6368     21780289 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    6369     21780289 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6370              : 
    6371              :   /* Check for cond expression requiring mask conversion.  */
    6372     21780289 :   if (rhs_code == COND_EXPR)
    6373              :     {
    6374       193983 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6375              : 
    6376       193983 :       gcc_assert (! COMPARISON_CLASS_P (rhs1));
    6377       193983 :       if (TREE_CODE (rhs1) == SSA_NAME)
    6378              :         {
    6379       193983 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6380       193983 :           if (!rhs1_type)
    6381              :             return NULL;
    6382              :         }
    6383              :       else
    6384              :         return NULL;
    6385              : 
    6386       181450 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6387              : 
    6388       181450 :       if (!vectype1 || !vectype2)
    6389              :         return NULL;
    6390              : 
    6391              :       /* Continue if a conversion is needed.  Also continue if we have
    6392              :          a comparison whose vector type would normally be different from
    6393              :          VECTYPE2 when considered in isolation.  In that case we'll
    6394              :          replace the comparison with an SSA name (so that we can record
    6395              :          its vector type) and behave as though the comparison was an SSA
    6396              :          name from the outset.  */
    6397       179276 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6398              :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6399              :         return NULL;
    6400              : 
    6401        45378 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    6402        90756 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6403        45378 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6404              :       else
    6405              :         tmp = rhs1;
    6406              : 
    6407        45378 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6408        45378 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    6409              :                                           gimple_assign_rhs2 (last_stmt),
    6410              :                                           gimple_assign_rhs3 (last_stmt));
    6411              : 
    6412        45378 :       *type_out = vectype1;
    6413        45378 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6414              : 
    6415        45378 :       return pattern_stmt;
    6416              :     }
    6417              : 
    6418              :   /* Now check for binary boolean operations requiring conversion for
    6419              :      one of operands.  */
    6420     21586306 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6421              :     return NULL;
    6422              : 
    6423      1764027 :   if (rhs_code != BIT_IOR_EXPR
    6424              :       && rhs_code != BIT_XOR_EXPR
    6425      1764027 :       && rhs_code != BIT_AND_EXPR
    6426      1465421 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    6427              :     return NULL;
    6428              : 
    6429      1585843 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    6430              : 
    6431      1585843 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6432      1585843 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    6433              : 
    6434      1585843 :   if (!rhs1_type || !rhs2_type
    6435      1585843 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    6436              :     return NULL;
    6437              : 
    6438        15940 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    6439              :     {
    6440        10381 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6441        10381 :       if (!vectype1)
    6442              :         return NULL;
    6443        10381 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    6444              :     }
    6445              :   else
    6446              :     {
    6447         5559 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6448         5559 :       if (!vectype1)
    6449              :         return NULL;
    6450         5559 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6451              :     }
    6452              : 
    6453        15940 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6454        15940 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    6455              : 
    6456        15940 :   *type_out = vectype1;
    6457        15940 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6458              : 
    6459        15940 :   return pattern_stmt;
    6460              : }
    6461              : 
    6462              : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    6463              :    the boolean condition under which it occurs, otherwise return null.  */
    6464              : 
    6465              : static tree
    6466       100055 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    6467              : {
    6468       100055 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    6469              :     {
    6470        98582 :       gcc_assert (gimple_assign_single_p (def_assign));
    6471              :       return NULL_TREE;
    6472              :     }
    6473              : 
    6474         1473 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    6475              :     {
    6476         1473 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    6477         1473 :       int mask_index = internal_fn_mask_index (ifn);
    6478         1473 :       return gimple_call_arg (def_call, mask_index);
    6479              :     }
    6480              : 
    6481            0 :   gcc_unreachable ();
    6482              : }
    6483              : 
    6484              : /* Return the equivalent of:
    6485              : 
    6486              :      fold_convert (TYPE, VALUE)
    6487              : 
    6488              :    with the expectation that the operation will be vectorized.
    6489              :    If new statements are needed, add them as pattern statements
    6490              :    to STMT_INFO.  */
    6491              : 
    6492              : static tree
    6493            0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    6494              :                                 tree type, tree value, stmt_vec_info stmt_info)
    6495              : {
    6496            0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    6497              :     return value;
    6498              : 
    6499            0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    6500            0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    6501            0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    6502              :                           get_vectype_for_scalar_type (vinfo, type));
    6503            0 :   return new_value;
    6504              : }
    6505              : 
    6506              : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    6507              :    internal function.  Return the final statement on success and set
    6508              :    *TYPE_OUT to the vector type being loaded or stored.
    6509              : 
    6510              :    This function only handles gathers and scatters that were recognized
    6511              :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    6512              : 
    6513              : static gimple *
    6514     31140951 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    6515              :                                    stmt_vec_info stmt_info, tree *type_out)
    6516              : {
    6517              :   /* Currently we only support this for loop vectorization.  */
    6518     35650186 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6519      4509235 :   if (!loop_vinfo)
    6520              :     return NULL;
    6521              : 
    6522              :   /* Make sure that we're looking at a gather load or scatter store.  */
    6523      4509235 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    6524      4509235 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    6525              :     return NULL;
    6526              : 
    6527              :   /* Get the boolean that controls whether the load or store happens.
    6528              :      This is null if the operation is unconditional.  */
    6529       100055 :   tree mask = vect_get_load_store_mask (stmt_info);
    6530              : 
    6531              :   /* DR analysis nailed down the vector type for the access.  */
    6532       100055 :   tree gs_vectype = STMT_VINFO_VECTYPE (stmt_info);
    6533              : 
    6534              :   /* Make sure that the target supports an appropriate internal
    6535              :      function for the gather/scatter operation.  */
    6536       100055 :   gather_scatter_info gs_info;
    6537       100055 :   if (!vect_check_gather_scatter (stmt_info, gs_vectype, loop_vinfo, &gs_info)
    6538       100055 :       || gs_info.ifn == IFN_LAST)
    6539              :     return NULL;
    6540              : 
    6541              :   /* Convert the mask to the right form.  */
    6542            0 :   if (mask)
    6543            0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    6544              :                                           loop_vinfo);
    6545            0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6546            0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6547            0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6548            0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6549            0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6550              : 
    6551              :   /* Get the invariant base and non-invariant offset, converting the
    6552              :      latter to the same width as the vector elements.  */
    6553            0 :   tree base = gs_info.base;
    6554            0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6555            0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6556              :                                                 gs_info.offset, stmt_info);
    6557              : 
    6558              :   /* Build the new pattern statement.  */
    6559            0 :   tree scale = size_int (gs_info.scale);
    6560            0 :   gcall *pattern_stmt;
    6561              : 
    6562            0 :   if (DR_IS_READ (dr))
    6563              :     {
    6564            0 :       tree zero = build_zero_cst (gs_info.element_type);
    6565            0 :       if (mask != NULL)
    6566              :         {
    6567            0 :           int elsval = MASK_LOAD_ELSE_ZERO;
    6568              : 
    6569            0 :           tree vec_els
    6570            0 :             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
    6571            0 :           pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
    6572              :                                                      gs_info.alias_ptr,
    6573              :                                                      offset, scale, zero, mask,
    6574              :                                                      vec_els);
    6575              :         }
    6576              :       else
    6577            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
    6578              :                                                    gs_info.alias_ptr,
    6579              :                                                    offset, scale, zero);
    6580            0 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
    6581            0 :       tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6582            0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6583              :     }
    6584              :   else
    6585              :     {
    6586            0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6587            0 :       if (mask != NULL)
    6588            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
    6589              :                                                    base, gs_info.alias_ptr,
    6590              :                                                    offset, scale, rhs, mask);
    6591              :       else
    6592            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6593              :                                                    base, gs_info.alias_ptr,
    6594              :                                                    offset, scale, rhs);
    6595              :     }
    6596            0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6597              : 
    6598              :   /* Copy across relevant vectorization info and associate DR with the
    6599              :      new pattern statement instead of the original statement.  */
    6600            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6601            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6602              : 
    6603            0 :   *type_out = gs_vectype;
    6604            0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6605              : 
    6606            0 :   return pattern_stmt;
    6607              : }
    6608              : 
    6609              : /* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
    6610              :    is points to a load statement that reads the same data as that of
    6611              :    STORE_VINFO.  */
    6612              : 
    6613              : static bool
    6614        35634 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
    6615              :                                   stmt_vec_info store_vinfo, tree cond_arg)
    6616              : {
    6617        35634 :   stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
    6618        35634 :   if (!load_stmt_vinfo
    6619        20650 :       || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
    6620        12429 :       || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
    6621        48063 :       || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
    6622              :                           STMT_VINFO_DATA_REF (load_stmt_vinfo)))
    6623        26253 :     return false;
    6624              : 
    6625              :   return true;
    6626              : }
    6627              : 
    6628              : /* Function vect_recog_cond_store_pattern
    6629              : 
    6630              :    Try to find the following pattern:
    6631              : 
    6632              :    x = *_3;
    6633              :    c = a CMP b;
    6634              :    y = c ? t_20 : x;
    6635              :    *_3 = y;
    6636              : 
    6637              :    where the store of _3 happens on a conditional select on a value loaded
    6638              :    from the same location.  In such case we can elide the initial load if
    6639              :    MASK_STORE is supported and instead only conditionally write out the result.
    6640              : 
    6641              :    The pattern produces for the above:
    6642              : 
    6643              :    c = a CMP b;
    6644              :    .MASK_STORE (_3, c, t_20)
    6645              : 
    6646              :    Input:
    6647              : 
    6648              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    6649              :    example, when this function is called with _3 then the search begins.
    6650              : 
    6651              :    Output:
    6652              : 
    6653              :    * TYPE_OUT: The type of the output  of this pattern.
    6654              : 
    6655              :    * Return value: A new stmt that will be used to replace the sequence.  */
    6656              : 
    6657              : static gimple *
    6658     31140951 : vect_recog_cond_store_pattern (vec_info *vinfo,
    6659              :                                stmt_vec_info stmt_vinfo, tree *type_out)
    6660              : {
    6661     31140951 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6662      4509235 :   if (!loop_vinfo)
    6663              :     return NULL;
    6664              : 
    6665      4509235 :   gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
    6666              : 
    6667              :   /* Needs to be a gimple store where we have DR info for.  */
    6668      4509235 :   if (!STMT_VINFO_DATA_REF (stmt_vinfo)
    6669      1079655 :       || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
    6670      4891025 :       || !gimple_store_p (store_stmt))
    6671      4129905 :     return NULL;
    6672              : 
    6673       379330 :   tree st_rhs = gimple_assign_rhs1 (store_stmt);
    6674              : 
    6675       379330 :   if (TREE_CODE (st_rhs) != SSA_NAME)
    6676              :     return NULL;
    6677              : 
    6678       295540 :   auto cond_vinfo = vinfo->lookup_def (st_rhs);
    6679              : 
    6680              :   /* If the condition isn't part of the loop then bool recog wouldn't have seen
    6681              :      it and so this transformation may not be valid.  */
    6682       295540 :   if (!cond_vinfo)
    6683              :     return NULL;
    6684              : 
    6685       278189 :   cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
    6686     31405084 :   gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
    6687       344864 :   if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
    6688              :     return NULL;
    6689              : 
    6690              :   /* Check if the else value matches the original loaded one.  */
    6691        18878 :   bool invert = false;
    6692        18878 :   tree cmp_ls = gimple_arg (cond_stmt, 0);
    6693        18878 :   if (TREE_CODE (cmp_ls) != SSA_NAME)
    6694              :     return NULL;
    6695              : 
    6696        18878 :   tree cond_arg1 = gimple_arg (cond_stmt, 1);
    6697        18878 :   tree cond_arg2 = gimple_arg (cond_stmt, 2);
    6698              : 
    6699        18878 :   if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
    6700        18878 :       && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
    6701              :                                                       cond_arg1)))
    6702              :     return NULL;
    6703              : 
    6704         9381 :   vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
    6705              : 
    6706         9381 :   tree scalar_type = TREE_TYPE (st_rhs);
    6707         9381 :   if (VECTOR_TYPE_P (scalar_type))
    6708              :     return NULL;
    6709              : 
    6710         9381 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6711         9381 :   if (vectype == NULL_TREE)
    6712              :     return NULL;
    6713              : 
    6714         9381 :   machine_mode mask_mode;
    6715         9381 :   machine_mode vecmode = TYPE_MODE (vectype);
    6716         1850 :   if (!VECTOR_MODE_P (vecmode)
    6717         9381 :       || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
    6718            0 :       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    6719         9381 :       || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
    6720         9381 :     return NULL;
    6721              : 
    6722            0 :   tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
    6723            0 :   if (may_be_nonaddressable_p (base))
    6724              :     return NULL;
    6725              : 
    6726              :   /* We need to use the false parameter of the conditional select.  */
    6727            0 :   tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
    6728            0 :   tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
    6729            0 :   gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
    6730              : 
    6731              :   /* This is a rough estimation to check that there aren't any aliasing stores
    6732              :      in between the load and store.  It's a bit strict, but for now it's good
    6733              :      enough.  */
    6734            0 :   if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
    6735              :     return NULL;
    6736              : 
    6737              :   /* If we have to invert the condition, i.e. use the true argument rather than
    6738              :      the false argument, we have to negate the mask.  */
    6739            0 :   if (invert)
    6740              :     {
    6741            0 :       tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6742              : 
    6743              :       /* Invert the mask using ^ 1.  */
    6744            0 :       tree itype = TREE_TYPE (cmp_ls);
    6745            0 :       gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
    6746              :                                            build_int_cst (itype, 1));
    6747              : 
    6748            0 :       tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
    6749            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
    6750            0 :       cmp_ls= var;
    6751              :     }
    6752              : 
    6753            0 :   if (TREE_CODE (base) != MEM_REF)
    6754            0 :    base = build_fold_addr_expr (base);
    6755              : 
    6756            0 :   tree ptr = build_int_cst (reference_alias_ptr_type (base),
    6757            0 :                             get_object_alignment (base));
    6758              : 
    6759              :   /* Convert the mask to the right form.  */
    6760            0 :   tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
    6761              :                                              vinfo);
    6762              : 
    6763            0 :   gcall *call
    6764            0 :     = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
    6765              :                                   cond_store_arg);
    6766            0 :   gimple_set_location (call, gimple_location (store_stmt));
    6767              : 
    6768              :   /* Copy across relevant vectorization info and associate DR with the
    6769              :      new pattern statement instead of the original statement.  */
    6770            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
    6771            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6772              : 
    6773            0 :   *type_out = vectype;
    6774            0 :   return call;
    6775              : }
    6776              : 
    6777              : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6778              :    that we want to consider for narrowing.  */
    6779              : 
    6780              : static bool
    6781     62573606 : vect_narrowable_type_p (tree type)
    6782              : {
    6783     62573606 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6784              : }
    6785              : 
    6786              : /* Return true if the operation given by CODE can be truncated to N bits
    6787              :    when only N bits of the output are needed.  This is only true if bit N+1
    6788              :    of the inputs has no effect on the low N bits of the result.  */
    6789              : 
    6790              : static bool
    6791     16052718 : vect_truncatable_operation_p (tree_code code)
    6792              : {
    6793     16052718 :   switch (code)
    6794              :     {
    6795              :     case NEGATE_EXPR:
    6796              :     case PLUS_EXPR:
    6797              :     case MINUS_EXPR:
    6798              :     case MULT_EXPR:
    6799              :     case BIT_NOT_EXPR:
    6800              :     case BIT_AND_EXPR:
    6801              :     case BIT_IOR_EXPR:
    6802              :     case BIT_XOR_EXPR:
    6803              :     case COND_EXPR:
    6804              :       return true;
    6805              : 
    6806      6112558 :     default:
    6807      6112558 :       return false;
    6808              :     }
    6809              : }
    6810              : 
    6811              : /* Record that STMT_INFO could be changed from operating on TYPE to
    6812              :    operating on a type with the precision and sign given by PRECISION
    6813              :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6814              :    it might not be a whole number of bytes.  */
    6815              : 
    6816              : static void
    6817      2549754 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6818              :                          unsigned int precision, signop sign)
    6819              : {
    6820              :   /* Round the precision up to a whole number of bytes.  */
    6821      2549754 :   precision = vect_element_precision (precision);
    6822      2549754 :   if (precision < TYPE_PRECISION (type)
    6823      2549754 :       && (!stmt_info->operation_precision
    6824        40806 :           || stmt_info->operation_precision > precision))
    6825              :     {
    6826      1641872 :       stmt_info->operation_precision = precision;
    6827      1641872 :       stmt_info->operation_sign = sign;
    6828              :     }
    6829      2549754 : }
    6830              : 
    6831              : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6832              :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6833              :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6834              : 
    6835              : static void
    6836     11668321 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6837              :                               unsigned int min_input_precision)
    6838              : {
    6839              :   /* This operation in isolation only requires the inputs to have
    6840              :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6841              :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6842              :      as a whole.  E.g. consider something like:
    6843              : 
    6844              :          unsigned short *x, *y;
    6845              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6846              : 
    6847              :      The right shift can be done on unsigned chars, and only requires the
    6848              :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6849              :      approach would mean turning a natural chain of single-vector unsigned
    6850              :      short operations into one that truncates "*x" and then extends
    6851              :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6852              :      operation and one vector for each unsigned char operation.
    6853              :      This would be a significant pessimization.
    6854              : 
    6855              :      Instead only propagate the maximum of this precision and the precision
    6856              :      required by the users of the result.  This means that we don't pessimize
    6857              :      the case above but continue to optimize things like:
    6858              : 
    6859              :          unsigned char *y;
    6860              :          unsigned short *x;
    6861              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6862              : 
    6863              :      Here we would truncate two vectors of *x to a single vector of
    6864              :      unsigned chars and use single-vector unsigned char operations for
    6865              :      everything else, rather than doing two unsigned short copies of
    6866              :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6867     11668321 :   min_input_precision = MAX (min_input_precision,
    6868              :                              stmt_info->min_output_precision);
    6869              : 
    6870     11668321 :   if (min_input_precision < TYPE_PRECISION (type)
    6871     11668321 :       && (!stmt_info->min_input_precision
    6872        63092 :           || stmt_info->min_input_precision > min_input_precision))
    6873       572291 :     stmt_info->min_input_precision = min_input_precision;
    6874     11668321 : }
    6875              : 
    6876              : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6877              :    we can calculate a reduced number of output bits for STMT_INFO,
    6878              :    whose result is LHS.  */
    6879              : 
    6880              : static bool
    6881     14825216 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6882              :                                        stmt_vec_info stmt_info, tree lhs)
    6883              : {
    6884              :   /* Take the maximum precision required by users of the result.  */
    6885     14825216 :   unsigned int precision = 0;
    6886     14825216 :   imm_use_iterator iter;
    6887     14825216 :   use_operand_p use;
    6888     30604746 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6889              :     {
    6890     15509179 :       gimple *use_stmt = USE_STMT (use);
    6891     15509179 :       if (is_gimple_debug (use_stmt))
    6892       676490 :         continue;
    6893     14832689 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6894     14832689 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6895              :         return false;
    6896              :       /* The input precision recorded for COND_EXPRs applies only to the
    6897              :          "then" and "else" values.  */
    6898       278384 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6899       248333 :       if (assign
    6900       248333 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6901          560 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6902          560 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6903              :         return false;
    6904       956022 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6905     14554865 :     }
    6906              : 
    6907       270351 :   if (dump_enabled_p ())
    6908         5826 :     dump_printf_loc (MSG_NOTE, vect_location,
    6909              :                      "only the low %d bits of %T are significant\n",
    6910              :                      precision, lhs);
    6911       270351 :   stmt_info->min_output_precision = precision;
    6912       270351 :   return true;
    6913              : }
    6914              : 
    6915              : /* Calculate min_output_precision for STMT_INFO.  */
    6916              : 
    6917              : static void
    6918     37568348 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6919              : {
    6920              :   /* We're only interested in statements with a narrowable result.  */
    6921     37568348 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6922     37568348 :   if (!lhs
    6923     29344733 :       || TREE_CODE (lhs) != SSA_NAME
    6924     62319344 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6925              :     return;
    6926              : 
    6927     14825216 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6928     14554865 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6929              : }
    6930              : 
    6931              : /* Use range information to decide whether STMT (described by STMT_INFO)
    6932              :    could be done in a narrower type.  This is effectively a forward
    6933              :    propagation, since it uses context-independent information that applies
    6934              :    to all users of an SSA name.  */
    6935              : 
    6936              : static void
    6937     21069121 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6938              : {
    6939     21069121 :   tree lhs = gimple_assign_lhs (stmt);
    6940     21069121 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6941     18630712 :     return;
    6942              : 
    6943     16753489 :   tree type = TREE_TYPE (lhs);
    6944     16753489 :   if (!vect_narrowable_type_p (type))
    6945              :     return;
    6946              : 
    6947              :   /* First see whether we have any useful range information for the result.  */
    6948     11412280 :   unsigned int precision = TYPE_PRECISION (type);
    6949     11412280 :   signop sign = TYPE_SIGN (type);
    6950     11412280 :   wide_int min_value, max_value;
    6951     11412280 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6952              :     return;
    6953              : 
    6954      5623591 :   tree_code code = gimple_assign_rhs_code (stmt);
    6955      5623591 :   unsigned int nops = gimple_num_ops (stmt);
    6956              : 
    6957      5623591 :   if (!vect_truncatable_operation_p (code))
    6958              :     {
    6959              :       /* Handle operations that can be computed in type T if all inputs
    6960              :          and outputs can be represented in type T.  Also handle left and
    6961              :          right shifts, where (in addition) the maximum shift amount must
    6962              :          be less than the number of bits in T.  */
    6963      2039230 :       bool is_shift;
    6964      2039230 :       switch (code)
    6965              :         {
    6966              :         case LSHIFT_EXPR:
    6967              :         case RSHIFT_EXPR:
    6968              :           is_shift = true;
    6969              :           break;
    6970              : 
    6971       285142 :         case ABS_EXPR:
    6972       285142 :         case MIN_EXPR:
    6973       285142 :         case MAX_EXPR:
    6974       285142 :         case TRUNC_DIV_EXPR:
    6975       285142 :         case CEIL_DIV_EXPR:
    6976       285142 :         case FLOOR_DIV_EXPR:
    6977       285142 :         case ROUND_DIV_EXPR:
    6978       285142 :         case EXACT_DIV_EXPR:
    6979              :           /* Modulus is excluded because it is typically calculated by doing
    6980              :              a division, for which minimum signed / -1 isn't representable in
    6981              :              the original signed type.  We could take the division range into
    6982              :              account instead, if handling modulus ever becomes important.  */
    6983       285142 :           is_shift = false;
    6984       285142 :           break;
    6985              : 
    6986              :         default:
    6987              :           return;
    6988              :         }
    6989      1362895 :       for (unsigned int i = 1; i < nops; ++i)
    6990              :         {
    6991      1052597 :           tree op = gimple_op (stmt, i);
    6992      1052597 :           wide_int op_min_value, op_max_value;
    6993      1052597 :           if (TREE_CODE (op) == INTEGER_CST)
    6994              :             {
    6995       304569 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6996       304569 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6997              :             }
    6998       748028 :           else if (TREE_CODE (op) == SSA_NAME)
    6999              :             {
    7000       748028 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    7001              :                 return;
    7002              :             }
    7003              :           else
    7004              :             return;
    7005              : 
    7006       691348 :           if (is_shift && i == 2)
    7007              :             {
    7008              :               /* There needs to be one more bit than the maximum shift amount.
    7009              : 
    7010              :                  If the maximum shift amount is already 1 less than PRECISION
    7011              :                  then we can't narrow the shift further.  Dealing with that
    7012              :                  case first ensures that we can safely use an unsigned range
    7013              :                  below.
    7014              : 
    7015              :                  op_min_value isn't relevant, since shifts by negative amounts
    7016              :                  are UB.  */
    7017       205598 :               if (wi::geu_p (op_max_value, precision - 1))
    7018              :                 return;
    7019       182455 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    7020              : 
    7021              :               /* As explained below, we can convert a signed shift into an
    7022              :                  unsigned shift if the sign bit is always clear.  At this
    7023              :                  point we've already processed the ranges of the output and
    7024              :                  the first input.  */
    7025       182455 :               auto op_sign = sign;
    7026       182455 :               if (sign == SIGNED && !wi::neg_p (min_value))
    7027              :                 op_sign = UNSIGNED;
    7028       364910 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    7029       182455 :                                              precision, op_sign);
    7030       364910 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    7031       182455 :                                              precision, op_sign);
    7032              :             }
    7033       668205 :           min_value = wi::min (min_value, op_min_value, sign);
    7034       668205 :           max_value = wi::max (max_value, op_max_value, sign);
    7035      1052597 :         }
    7036              :     }
    7037              : 
    7038              :   /* Try to switch signed types for unsigned types if we can.
    7039              :      This is better for two reasons.  First, unsigned ops tend
    7040              :      to be cheaper than signed ops.  Second, it means that we can
    7041              :      handle things like:
    7042              : 
    7043              :         signed char c;
    7044              :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    7045              : 
    7046              :      as:
    7047              : 
    7048              :         signed char c;
    7049              :         unsigned short res_1 = (unsigned short) c & 0xff00;
    7050              :         int res = (int) res_1;
    7051              : 
    7052              :      where the intermediate result res_1 has unsigned rather than
    7053              :      signed type.  */
    7054      3894659 :   if (sign == SIGNED && !wi::neg_p (min_value))
    7055              :     sign = UNSIGNED;
    7056              : 
    7057              :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    7058      3894659 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    7059      3894659 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    7060      3894659 :   unsigned int value_precision = MAX (precision1, precision2);
    7061      3894659 :   if (value_precision >= precision)
    7062              :     return;
    7063              : 
    7064      2438409 :   if (dump_enabled_p ())
    7065       111159 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7066              :                      " without loss of precision: %G",
    7067              :                      sign == SIGNED ? "signed" : "unsigned",
    7068              :                      value_precision, (gimple *) stmt);
    7069              : 
    7070      2438409 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    7071      2438409 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    7072     11412280 : }
    7073              : 
    7074              : /* Use information about the users of STMT's result to decide whether
    7075              :    STMT (described by STMT_INFO) could be done in a narrower type.
    7076              :    This is effectively a backward propagation.  */
    7077              : 
    7078              : static void
    7079     21069121 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    7080              : {
    7081     21069121 :   tree_code code = gimple_assign_rhs_code (stmt);
    7082     21069121 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    7083     21069121 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    7084     21069121 :   if (!vect_narrowable_type_p (type))
    7085     11839209 :     return;
    7086              : 
    7087     13338490 :   unsigned int precision = TYPE_PRECISION (type);
    7088     13338490 :   unsigned int operation_precision, min_input_precision;
    7089     13338490 :   switch (code)
    7090              :     {
    7091      2430122 :     CASE_CONVERT:
    7092              :       /* Only the bits that contribute to the output matter.  Don't change
    7093              :          the precision of the operation itself.  */
    7094      2430122 :       operation_precision = precision;
    7095      2430122 :       min_input_precision = stmt_info->min_output_precision;
    7096      2430122 :       break;
    7097              : 
    7098       479241 :     case LSHIFT_EXPR:
    7099       479241 :     case RSHIFT_EXPR:
    7100       479241 :       {
    7101       479241 :         tree shift = gimple_assign_rhs2 (stmt);
    7102       479241 :         unsigned int min_const_shift, max_const_shift;
    7103       479241 :         wide_int min_shift, max_shift;
    7104       479241 :         if (TREE_CODE (shift) == SSA_NAME
    7105       105729 :             && vect_get_range_info (shift, &min_shift, &max_shift)
    7106        81506 :             && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
    7107       558002 :             && wi::lt_p (max_shift, TYPE_PRECISION (type),
    7108        78761 :                          TYPE_SIGN (TREE_TYPE (shift))))
    7109              :           {
    7110        70587 :             min_const_shift = min_shift.to_uhwi ();
    7111        70587 :             max_const_shift = max_shift.to_uhwi ();
    7112              :           }
    7113       408654 :         else if (TREE_CODE (shift) == INTEGER_CST
    7114       782166 :                  && wi::ltu_p (wi::to_widest (shift), precision))
    7115       373404 :           min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
    7116              :         else
    7117        35250 :           return;
    7118       443991 :         if (code == LSHIFT_EXPR)
    7119              :           {
    7120              :             /* Avoid creating an undefined shift.
    7121              : 
    7122              :                ??? We could instead use min_output_precision as-is and
    7123              :                optimize out-of-range shifts to zero.  However, only
    7124              :                degenerate testcases shift away all their useful input data,
    7125              :                and it isn't natural to drop input operations in the middle
    7126              :                of vectorization.  This sort of thing should really be
    7127              :                handled before vectorization.  */
    7128       108192 :             operation_precision = MAX (stmt_info->min_output_precision,
    7129              :                                        max_const_shift + 1);
    7130              :             /* We need CONST_SHIFT fewer bits of the input.  */
    7131       108192 :             min_input_precision = (MAX (operation_precision, max_const_shift)
    7132              :                                    - min_const_shift);
    7133              :           }
    7134              :         else
    7135              :           {
    7136              :             /* We need CONST_SHIFT extra bits to do the operation.  */
    7137       335799 :             operation_precision = (stmt_info->min_output_precision
    7138              :                                    + max_const_shift);
    7139       335799 :             min_input_precision = operation_precision;
    7140              :           }
    7141       443991 :         break;
    7142       479241 :       }
    7143              : 
    7144     10429127 :     default:
    7145     10429127 :       if (vect_truncatable_operation_p (code))
    7146              :         {
    7147              :           /* Input bit N has no effect on output bits N-1 and lower.  */
    7148      6355799 :           operation_precision = stmt_info->min_output_precision;
    7149      6355799 :           min_input_precision = operation_precision;
    7150      6355799 :           break;
    7151              :         }
    7152              :       return;
    7153              :     }
    7154              : 
    7155      9229912 :   if (operation_precision < precision)
    7156              :     {
    7157       111345 :       if (dump_enabled_p ())
    7158         2786 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7159              :                          " without affecting users: %G",
    7160         2786 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    7161              :                          operation_precision, (gimple *) stmt);
    7162       222690 :       vect_set_operation_type (stmt_info, type, operation_precision,
    7163       111345 :                                TYPE_SIGN (type));
    7164              :     }
    7165      9229912 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    7166              : }
    7167              : 
    7168              : /* Return true if the statement described by STMT_INFO sets a boolean
    7169              :    SSA_NAME and if we know how to vectorize this kind of statement using
    7170              :    vector mask types.  */
    7171              : 
    7172              : static bool
    7173     38655640 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    7174              : {
    7175     38655640 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    7176     38655640 :   tree_code code = ERROR_MARK;
    7177     38655640 :   gassign *assign = NULL;
    7178     38655640 :   gcond *cond = NULL;
    7179              : 
    7180     38655640 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    7181     21845499 :     code = gimple_assign_rhs_code (assign);
    7182     16810141 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    7183              :     {
    7184      5232600 :       lhs = gimple_cond_lhs (cond);
    7185      5232600 :       code = gimple_cond_code (cond);
    7186              :     }
    7187              : 
    7188     38655640 :   if (!lhs
    7189     35570858 :       || TREE_CODE (lhs) != SSA_NAME
    7190     69594247 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    7191              :     return false;
    7192              : 
    7193      2093465 :   if (code != ERROR_MARK)
    7194              :     {
    7195      1842290 :       switch (code)
    7196              :         {
    7197              :         CASE_CONVERT:
    7198              :         case SSA_NAME:
    7199              :         case BIT_NOT_EXPR:
    7200              :         case BIT_IOR_EXPR:
    7201              :         case BIT_XOR_EXPR:
    7202              :         case BIT_AND_EXPR:
    7203              :           return true;
    7204              : 
    7205      1447808 :         default:
    7206      1447808 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    7207              :         }
    7208              :     }
    7209       251175 :   else if (is_a <gphi *> (stmt_info->stmt))
    7210       142623 :     return true;
    7211              :   return false;
    7212              : }
    7213              : 
    7214              : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    7215              :    a vector mask type instead of a normal vector type.  Record the
    7216              :    result in STMT_INFO->mask_precision.  Returns true when the
    7217              :    precision changed.  */
    7218              : 
    7219              : static bool
    7220     38655640 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    7221              : {
    7222     38655640 :   if (!possible_vector_mask_operation_p (stmt_info))
    7223              :     return false;
    7224              : 
    7225              :   /* If at least one boolean input uses a vector mask type,
    7226              :      pick the mask type with the narrowest elements.
    7227              : 
    7228              :      ??? This is the traditional behavior.  It should always produce
    7229              :      the smallest number of operations, but isn't necessarily the
    7230              :      optimal choice.  For example, if we have:
    7231              : 
    7232              :        a = b & c
    7233              : 
    7234              :      where:
    7235              : 
    7236              :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    7237              :        - b also uses M16
    7238              :        - c uses a mask type for 8-bit elements (M8)
    7239              : 
    7240              :      then picking M8 gives:
    7241              : 
    7242              :        - 1 M16->M8 pack for b
    7243              :        - 1 M8 AND for a
    7244              :        - 2 M8->M16 unpacks for the user of a
    7245              : 
    7246              :      whereas picking M16 would have given:
    7247              : 
    7248              :        - 2 M8->M16 unpacks for c
    7249              :        - 2 M16 ANDs for a
    7250              : 
    7251              :      The number of operations are equal, but M16 would have given
    7252              :      a shorter dependency chain and allowed more ILP.  */
    7253      1934415 :   unsigned int precision = ~0U;
    7254      1934415 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    7255              : 
    7256              :   /* If the statement compares two values that shouldn't use vector masks,
    7257              :      try comparing the values as normal scalars instead.  */
    7258      1934415 :   tree_code code = ERROR_MARK;
    7259      1934415 :   tree op0_type;
    7260      1934415 :   unsigned int nops = -1;
    7261      1934415 :   unsigned int ops_start = 0;
    7262              : 
    7263      1934415 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    7264              :     {
    7265      1259592 :       code = gimple_assign_rhs_code (assign);
    7266      1259592 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    7267      1259592 :       nops = gimple_num_ops (assign);
    7268      1259592 :       ops_start = 1;
    7269              :     }
    7270       674823 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    7271              :     {
    7272       532200 :       code = gimple_cond_code (cond);
    7273       532200 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    7274       532200 :       nops = 2;
    7275       532200 :       ops_start = 0;
    7276              :     }
    7277              : 
    7278      1791792 :   if (code != ERROR_MARK)
    7279              :     {
    7280      5333175 :       for (unsigned int i = ops_start; i < nops; ++i)
    7281              :         {
    7282      3541383 :           tree rhs = gimple_op (stmt, i);
    7283      3541383 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    7284      1740829 :             continue;
    7285              : 
    7286      1800554 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7287      1800554 :           if (!def_stmt_info)
    7288              :             /* Don't let external or constant operands influence the choice.
    7289              :                We can convert them to whichever vector type we pick.  */
    7290       547140 :             continue;
    7291              : 
    7292      1253414 :           if (def_stmt_info->mask_precision)
    7293              :             {
    7294      1034938 :               if (precision > def_stmt_info->mask_precision)
    7295      3541383 :                 precision = def_stmt_info->mask_precision;
    7296              :             }
    7297              :         }
    7298              : 
    7299      1791792 :       if (precision == ~0U
    7300      1457545 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    7301              :         {
    7302      1262247 :           scalar_mode mode;
    7303      1262247 :           tree vectype, mask_type;
    7304      1262247 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    7305              :               /* Do not allow this to set vinfo->vector_mode, this might
    7306              :                  disrupt the result for the next iteration.  */
    7307      1262247 :               && (vectype = get_related_vectype_for_scalar_type
    7308      1512301 :                                                 (vinfo->vector_mode, op0_type))
    7309      1103827 :               && (mask_type = truth_type_for (vectype))
    7310      1103827 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    7311      1707546 :             precision = GET_MODE_BITSIZE (mode);
    7312              :         }
    7313              :     }
    7314              :   else
    7315              :     {
    7316       142623 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    7317       578308 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    7318              :         {
    7319       435685 :           tree rhs = gimple_phi_arg_def (phi, i);
    7320              : 
    7321       435685 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7322       435685 :           if (!def_stmt_info)
    7323              :             /* Don't let external or constant operands influence the choice.
    7324              :                We can convert them to whichever vector type we pick.  */
    7325       281344 :             continue;
    7326              : 
    7327       154341 :           if (def_stmt_info->mask_precision)
    7328              :             {
    7329       129183 :               if (precision > def_stmt_info->mask_precision)
    7330       435685 :                 precision = def_stmt_info->mask_precision;
    7331              :             }
    7332              :         }
    7333              :     }
    7334              : 
    7335      1934415 :   if (stmt_info->mask_precision != precision)
    7336              :     {
    7337      1806446 :       if (dump_enabled_p ())
    7338              :         {
    7339         8024 :           if (precision == ~0U)
    7340         1882 :             dump_printf_loc (MSG_NOTE, vect_location,
    7341              :                              "using normal nonmask vectors for %G",
    7342              :                              stmt_info->stmt);
    7343              :           else
    7344         6142 :             dump_printf_loc (MSG_NOTE, vect_location,
    7345              :                              "using boolean precision %d for %G",
    7346              :                              precision, stmt_info->stmt);
    7347              :         }
    7348              : 
    7349              :       /* ???  We'd like to assert stmt_info->mask_precision == 0
    7350              :          || stmt_info->mask_precision > precision, thus that we only
    7351              :          decrease mask precisions throughout iteration, but the
    7352              :          tcc_comparison handling above means for comparisons of bools
    7353              :          we start with 8 but might increase in case the bools get mask
    7354              :          precision on their own.  */
    7355      1806446 :       stmt_info->mask_precision = precision;
    7356      1806446 :       return true;
    7357              :     }
    7358              :   return false;
    7359              : }
    7360              : 
    7361              : /* Handle vect_determine_precisions for STMT_INFO, given that we
    7362              :    have already done so for the users of its result.  */
    7363              : 
    7364              : void
    7365     37568348 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    7366              : {
    7367     37568348 :   vect_determine_min_output_precision (vinfo, stmt_info);
    7368     37568348 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    7369              :     {
    7370     21069121 :       vect_determine_precisions_from_range (stmt_info, stmt);
    7371     21069121 :       vect_determine_precisions_from_users (stmt_info, stmt);
    7372              :     }
    7373     37568348 : }
    7374              : 
    7375              : /* Walk backwards through the vectorizable region to determine the
    7376              :    values of these fields:
    7377              : 
    7378              :    - min_output_precision
    7379              :    - min_input_precision
    7380              :    - operation_precision
    7381              :    - operation_sign.  */
    7382              : 
    7383              : void
    7384      1053685 : vect_determine_precisions (vec_info *vinfo)
    7385              : {
    7386      1053685 :   basic_block *bbs = vinfo->bbs;
    7387      1053685 :   unsigned int nbbs = vinfo->nbbs;
    7388              : 
    7389      1073160 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    7390              : 
    7391              :   /* For mask precisions we have to iterate since otherwise we do not
    7392              :      get reduction PHI precision correct.  For now do this only for
    7393              :      loop vectorization.  */
    7394      1123843 :   bool changed;
    7395      1123843 :   do
    7396              :     {
    7397      1123843 :       changed = false;
    7398     12692294 :       for (unsigned int i = 0; i < nbbs; i++)
    7399              :         {
    7400     11568451 :           basic_block bb = bbs[i];
    7401     11568451 :           for (auto gsi = gsi_start_phis (bb);
    7402     18728222 :                !gsi_end_p (gsi); gsi_next (&gsi))
    7403              :             {
    7404      7159771 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7405      7159771 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7406      6979014 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7407              :             }
    7408    120238589 :           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7409              :             {
    7410     97101687 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7411     97101687 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7412     31676626 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7413              :             }
    7414              :         }
    7415              :     }
    7416      2177528 :   while (changed && is_a <loop_vec_info> (vinfo));
    7417              : 
    7418     12460559 :   for (unsigned int i = 0; i < nbbs; i++)
    7419              :     {
    7420     11406874 :       basic_block bb = bbs[nbbs - i - 1];
    7421    213725296 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    7422              :         {
    7423     95455774 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7424     95455774 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7425     30792984 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7426              :         }
    7427     18362995 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7428              :         {
    7429      6956121 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7430      6956121 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7431      6775364 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7432              :         }
    7433              :     }
    7434      1053685 : }
    7435              : 
    7436              : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    7437              : 
    7438              : struct vect_recog_func
    7439              : {
    7440              :   vect_recog_func_ptr fn;
    7441              :   const char *name;
    7442              : };
    7443              : 
    7444              : /* Note that ordering matters - the first pattern matching on a stmt is
    7445              :    taken which means usually the more complex one needs to precede the
    7446              :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    7447              : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    7448              :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    7449              :   { vect_recog_bit_insert_pattern, "bit_insert" },
    7450              :   { vect_recog_abd_pattern, "abd" },
    7451              :   { vect_recog_over_widening_pattern, "over_widening" },
    7452              :   /* Must come after over_widening, which narrows the shift as much as
    7453              :      possible beforehand.  */
    7454              :   { vect_recog_average_pattern, "average" },
    7455              :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    7456              :   { vect_recog_mulhs_pattern, "mult_high" },
    7457              :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    7458              :   { vect_recog_widen_mult_pattern, "widen_mult" },
    7459              :   { vect_recog_dot_prod_pattern, "dot_prod" },
    7460              :   { vect_recog_sad_pattern, "sad" },
    7461              :   { vect_recog_widen_sum_pattern, "widen_sum" },
    7462              :   { vect_recog_pow_pattern, "pow" },
    7463              :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    7464              :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    7465              :   { vect_recog_widen_shift_pattern, "widen_shift" },
    7466              :   { vect_recog_rotate_pattern, "rotate" },
    7467              :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    7468              :   { vect_recog_divmod_pattern, "divmod" },
    7469              :   { vect_recog_mod_var_pattern, "modvar" },
    7470              :   { vect_recog_mult_pattern, "mult" },
    7471              :   { vect_recog_sat_add_pattern, "sat_add" },
    7472              :   { vect_recog_sat_sub_pattern, "sat_sub" },
    7473              :   { vect_recog_sat_trunc_pattern, "sat_trunc" },
    7474              :   { vect_recog_gcond_pattern, "gcond" },
    7475              :   { vect_recog_bool_pattern, "bool" },
    7476              :   /* This must come before mask conversion, and includes the parts
    7477              :      of mask conversion that are needed for gather and scatter
    7478              :      internal functions.  */
    7479              :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    7480              :   { vect_recog_cond_store_pattern, "cond_store" },
    7481              :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    7482              :   { vect_recog_widen_plus_pattern, "widen_plus" },
    7483              :   { vect_recog_widen_minus_pattern, "widen_minus" },
    7484              :   { vect_recog_widen_abd_pattern, "widen_abd" },
    7485              :   /* These must come after the double widening ones.  */
    7486              : };
    7487              : 
    7488              : /* Mark statements that are involved in a pattern.  */
    7489              : 
    7490              : void
    7491      1014296 : vect_mark_pattern_stmts (vec_info *vinfo,
    7492              :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    7493              :                          tree pattern_vectype)
    7494              : {
    7495      1014296 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    7496      1014296 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7497              : 
    7498      1014296 :   gimple *orig_pattern_stmt = NULL;
    7499      1014296 :   if (is_pattern_stmt_p (orig_stmt_info))
    7500              :     {
    7501              :       /* We're replacing a statement in an existing pattern definition
    7502              :          sequence.  */
    7503        11204 :       orig_pattern_stmt = orig_stmt_info->stmt;
    7504        11204 :       if (dump_enabled_p ())
    7505          664 :         dump_printf_loc (MSG_NOTE, vect_location,
    7506              :                          "replacing earlier pattern %G", orig_pattern_stmt);
    7507              : 
    7508              :       /* To keep the book-keeping simple, just swap the lhs of the
    7509              :          old and new statements, so that the old one has a valid but
    7510              :          unused lhs.  */
    7511        11204 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    7512        11204 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    7513        11204 :       gimple_set_lhs (pattern_stmt, old_lhs);
    7514              : 
    7515        11204 :       if (dump_enabled_p ())
    7516          664 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    7517              : 
    7518              :       /* Switch to the statement that ORIG replaces.  */
    7519        11204 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    7520              : 
    7521              :       /* We shouldn't be replacing the main pattern statement.  */
    7522        11204 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    7523              :                   != orig_pattern_stmt);
    7524              :     }
    7525              : 
    7526      1014296 :   if (def_seq)
    7527              :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    7528      2235560 :          !gsi_end_p (si); gsi_next (&si))
    7529              :       {
    7530      1351109 :         if (dump_enabled_p ())
    7531        24652 :           dump_printf_loc (MSG_NOTE, vect_location,
    7532              :                            "extra pattern stmt: %G", gsi_stmt (si));
    7533      1351109 :         stmt_vec_info pattern_stmt_info
    7534      1351109 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    7535              :                                     orig_stmt_info, pattern_vectype);
    7536              :         /* Stmts in the def sequence are not vectorizable cycle or
    7537              :            induction defs, instead they should all be vect_internal_def
    7538              :            feeding the main pattern stmt which retains this def type.  */
    7539      1351109 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    7540              :       }
    7541              : 
    7542      1014296 :   if (orig_pattern_stmt)
    7543              :     {
    7544        11204 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    7545              :                               orig_stmt_info, pattern_vectype);
    7546              : 
    7547              :       /* Insert all the new pattern statements before the original one.  */
    7548        11204 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7549        11204 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    7550              :                                                orig_def_seq);
    7551        11204 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    7552        11204 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    7553              : 
    7554              :       /* Remove the pattern statement that this new pattern replaces.  */
    7555        11204 :       gsi_remove (&gsi, false);
    7556              :     }
    7557              :   else
    7558      1003092 :     vect_set_pattern_stmt (vinfo,
    7559              :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    7560              : 
    7561              :   /* For any conditionals mark them as vect_condition_def.  */
    7562      1014296 :   if (is_a <gcond *> (pattern_stmt))
    7563       363555 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    7564              : 
    7565              :   /* Transfer reduction path info to the pattern.  */
    7566      1014296 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    7567              :     {
    7568        15999 :       gimple_match_op op;
    7569        15999 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    7570            0 :         gcc_unreachable ();
    7571        15999 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    7572              :       /* Search the pattern def sequence and the main pattern stmt.  Note
    7573              :          we may have inserted all into a containing pattern def sequence
    7574              :          so the following is a bit awkward.  */
    7575        15999 :       gimple_stmt_iterator si;
    7576        15999 :       gimple *s;
    7577        15999 :       if (def_seq)
    7578              :         {
    7579        14864 :           si = gsi_start (def_seq);
    7580        14864 :           s = gsi_stmt (si);
    7581        14864 :           gsi_next (&si);
    7582              :         }
    7583              :       else
    7584              :         {
    7585              :           si = gsi_none ();
    7586              :           s = pattern_stmt;
    7587              :         }
    7588        33676 :       do
    7589              :         {
    7590        33676 :           bool found = false;
    7591        33676 :           if (gimple_extract_op (s, &op))
    7592              :             {
    7593        82006 :               for (unsigned i = 0; i < op.num_ops; ++i)
    7594        64329 :                 if (op.ops[i] == lookfor)
    7595              :                   {
    7596        15999 :                     STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7597        15999 :                     lookfor = gimple_get_lhs (s);
    7598        15999 :                     found = true;
    7599        15999 :                     break;
    7600              :                   }
    7601              :               /* Try harder to find a mid-entry into an earlier pattern
    7602              :                  sequence.  Likewise an entry to a stmt skipping a conversion
    7603              :                  on an input.  This means that the initial 'lookfor' was
    7604              :                  bogus.  */
    7605        15999 :               if (!found)
    7606              :                 {
    7607        38361 :                   for (unsigned i = 0; i < op.num_ops; ++i)
    7608        20684 :                     if (TREE_CODE (op.ops[i]) == SSA_NAME)
    7609        17677 :                       if (auto def = vinfo->lookup_def (op.ops[i]))
    7610        17484 :                         if (vect_is_reduction (def)
    7611        17484 :                             || (is_a <gphi *> (def->stmt)
    7612            0 :                                 && STMT_VINFO_REDUC_DEF (def) != NULL))
    7613              :                           {
    7614            0 :                             STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7615            0 :                             lookfor = gimple_get_lhs (s);
    7616            0 :                             found = true;
    7617            0 :                             break;
    7618              :                           }
    7619              :                 }
    7620              :             }
    7621        33676 :           if (s == pattern_stmt)
    7622              :             {
    7623        15999 :               if (!found && dump_enabled_p ())
    7624            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7625              :                                  "failed to update reduction index.\n");
    7626        15999 :               break;
    7627              :             }
    7628        17677 :           if (gsi_end_p (si))
    7629              :             s = pattern_stmt;
    7630              :           else
    7631              :             {
    7632         2813 :               s = gsi_stmt (si);
    7633         2813 :               if (s == pattern_stmt)
    7634              :                 /* Found the end inside a bigger pattern def seq.  */
    7635              :                 si = gsi_none ();
    7636              :               else
    7637         2813 :                 gsi_next (&si);
    7638              :             }
    7639              :         } while (1);
    7640              :     }
    7641      1014296 : }
    7642              : 
    7643              : /* Function vect_pattern_recog_1
    7644              : 
    7645              :    Input:
    7646              :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7647              :         computation pattern.
    7648              :    STMT_INFO: A stmt from which the pattern search should start.
    7649              : 
    7650              :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7651              :    a sequence of statements that has the same functionality and can be
    7652              :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7653              :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7654              :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7655              :    statement, having first checked that the target supports the new operation
    7656              :    in that type.
    7657              : 
    7658              :    This function also does some bookkeeping, as explained in the documentation
    7659              :    for vect_recog_pattern.  */
    7660              : 
    7661              : static void
    7662   1003708221 : vect_pattern_recog_1 (vec_info *vinfo,
    7663              :                       const vect_recog_func &recog_func, stmt_vec_info stmt_info)
    7664              : {
    7665   1003708221 :   gimple *pattern_stmt;
    7666   1003708221 :   tree pattern_vectype;
    7667              : 
    7668              :   /* If this statement has already been replaced with pattern statements,
    7669              :      leave the original statement alone, since the first match wins.
    7670              :      Instead try to match against the definition statements that feed
    7671              :      the main pattern statement.  */
    7672   1003708221 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7673              :     {
    7674     12904691 :       gimple_stmt_iterator gsi;
    7675     12904691 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7676     31237424 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7677     18332733 :         vect_pattern_recog_1 (vinfo, recog_func,
    7678              :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7679              :       return;
    7680              :     }
    7681              : 
    7682    990803530 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7683    990803530 :   pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
    7684    990803530 :   if (!pattern_stmt)
    7685              :     {
    7686              :       /* Clear any half-formed pattern definition sequence.  */
    7687    989789234 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7688    989789234 :       return;
    7689              :     }
    7690              : 
    7691              :   /* Found a vectorizable pattern.  */
    7692      1014296 :   if (dump_enabled_p ())
    7693        18908 :     dump_printf_loc (MSG_NOTE, vect_location,
    7694              :                      "%s pattern recognized: %G",
    7695        18908 :                      recog_func.name, pattern_stmt);
    7696              : 
    7697              :   /* Mark the stmts that are involved in the pattern. */
    7698      1014296 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7699              : }
    7700              : 
    7701              : 
    7702              : /* Function vect_pattern_recog
    7703              : 
    7704              :    Input:
    7705              :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7706              :         computation idioms.
    7707              : 
    7708              :    Output - for each computation idiom that is detected we create a new stmt
    7709              :         that provides the same functionality and that can be vectorized.  We
    7710              :         also record some information in the struct_stmt_info of the relevant
    7711              :         stmts, as explained below:
    7712              : 
    7713              :    At the entry to this function we have the following stmts, with the
    7714              :    following initial value in the STMT_VINFO fields:
    7715              : 
    7716              :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7717              :          S1: a_i = ....                 -       -               -
    7718              :          S2: a_2 = ..use(a_i)..         -       -               -
    7719              :          S3: a_1 = ..use(a_2)..         -       -               -
    7720              :          S4: a_0 = ..use(a_1)..         -       -               -
    7721              :          S5: ... = ..use(a_0)..         -       -               -
    7722              : 
    7723              :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7724              :    represented by a single stmt.  We then:
    7725              :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7726              :      inserted into the code)
    7727              :    - fill in the STMT_VINFO fields as follows:
    7728              : 
    7729              :                                   in_pattern_p  related_stmt    vec_stmt
    7730              :          S1: a_i = ....                 -       -               -
    7731              :          S2: a_2 = ..use(a_i)..         -       -               -
    7732              :          S3: a_1 = ..use(a_2)..         -       -               -
    7733              :          S4: a_0 = ..use(a_1)..         true    S6              -
    7734              :           '---> S6: a_new = ....        -       S4              -
    7735              :          S5: ... = ..use(a_0)..         -       -               -
    7736              : 
    7737              :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7738              :    to each other through the RELATED_STMT field).
    7739              : 
    7740              :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7741              :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7742              :    remain irrelevant unless used by stmts other than S4.
    7743              : 
    7744              :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7745              :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7746              :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7747              :    S4 will be skipped, and S5 will be vectorized as usual:
    7748              : 
    7749              :                                   in_pattern_p  related_stmt    vec_stmt
    7750              :          S1: a_i = ....                 -       -               -
    7751              :          S2: a_2 = ..use(a_i)..         -       -               -
    7752              :          S3: a_1 = ..use(a_2)..         -       -               -
    7753              :        > VS6: va_new = ....             -       -               -
    7754              :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7755              :           '---> S6: a_new = ....        -       S4              VS6
    7756              :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7757              :          S5: ... = ..use(a_0)..         -       -               -
    7758              : 
    7759              :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7760              :    elsewhere), and we'll end up with:
    7761              : 
    7762              :         VS6: va_new = ....
    7763              :         VS5: ... = ..vuse(va_new)..
    7764              : 
    7765              :    In case of more than one pattern statements, e.g., widen-mult with
    7766              :    intermediate type:
    7767              : 
    7768              :      S1  a_t = ;
    7769              :      S2  a_T = (TYPE) a_t;
    7770              :            '--> S3: a_it = (interm_type) a_t;
    7771              :      S4  prod_T = a_T * CONST;
    7772              :            '--> S5: prod_T' = a_it w* CONST;
    7773              : 
    7774              :    there may be other users of a_T outside the pattern.  In that case S2 will
    7775              :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7776              :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7777              :    be recorded in S3.  */
    7778              : 
    7779              : void
    7780      1053685 : vect_pattern_recog (vec_info *vinfo)
    7781              : {
    7782      1053685 :   basic_block *bbs = vinfo->bbs;
    7783      1053685 :   unsigned int nbbs = vinfo->nbbs;
    7784              : 
    7785      1053685 :   vect_determine_precisions (vinfo);
    7786              : 
    7787      1053685 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7788              : 
    7789              :   /* Scan through the stmts in the region, applying the pattern recognition
    7790              :      functions starting at each stmt visited.  */
    7791     12460559 :   for (unsigned i = 0; i < nbbs; i++)
    7792              :     {
    7793     11406874 :       basic_block bb = bbs[i];
    7794              : 
    7795    118269522 :       for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7796              :         {
    7797     95455774 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7798              : 
    7799     95455774 :           if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7800     64662790 :             continue;
    7801              : 
    7802              :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7803   1016168472 :           for (const auto &func_ptr : vect_vect_recog_func_ptrs)
    7804    985375488 :             vect_pattern_recog_1 (vinfo, func_ptr,
    7805              :                                   stmt_info);
    7806              :         }
    7807              :     }
    7808              : 
    7809              :   /* After this no more add_stmt calls are allowed.  */
    7810      1053685 :   vinfo->stmt_vec_info_ro = true;
    7811      1053685 : }
    7812              : 
    7813              : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7814              :    or internal_fn contained in ch, respectively.  */
    7815              : gimple *
    7816       159427 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7817              : {
    7818       159427 :   gcc_assert (op0 != NULL_TREE);
    7819       159427 :   if (ch.is_tree_code ())
    7820       159427 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7821              : 
    7822            0 :   gcc_assert (ch.is_internal_fn ());
    7823            0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7824              :                                              op1 == NULL_TREE ? 1 : 2,
    7825              :                                              op0, op1);
    7826            0 :   gimple_call_set_lhs (stmt, lhs);
    7827            0 :   return stmt;
    7828              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.