LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.9 % 3128 2780
Test Date: 2026-05-11 19:44:49 Functions: 97.6 % 85 83
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Analysis Utilities for Loop Vectorization.
       2              :    Copyright (C) 2006-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "gimple-iterator.h"
      29              : #include "gimple-fold.h"
      30              : #include "ssa.h"
      31              : #include "expmed.h"
      32              : #include "optabs-tree.h"
      33              : #include "insn-config.h"
      34              : #include "recog.h"            /* FIXME: for insn_data */
      35              : #include "fold-const.h"
      36              : #include "stor-layout.h"
      37              : #include "tree-eh.h"
      38              : #include "gimplify.h"
      39              : #include "gimple-iterator.h"
      40              : #include "gimple-fold.h"
      41              : #include "gimplify-me.h"
      42              : #include "cfgloop.h"
      43              : #include "tree-vectorizer.h"
      44              : #include "dumpfile.h"
      45              : #include "builtins.h"
      46              : #include "internal-fn.h"
      47              : #include "case-cfn-macros.h"
      48              : #include "fold-const-call.h"
      49              : #include "attribs.h"
      50              : #include "cgraph.h"
      51              : #include "omp-simd-clone.h"
      52              : #include "predict.h"
      53              : #include "tree-vector-builder.h"
      54              : #include "tree-ssa-loop-ivopts.h"
      55              : #include "vec-perm-indices.h"
      56              : #include "gimple-range.h"
      57              : #include "alias.h"
      58              : 
      59              : 
      60              : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      61              :    in the first operand.  Disentangling this is future work, the
      62              :    IL is properly transferred to VEC_COND_EXPRs with separate compares.  */
      63              : 
      64              : 
      65              : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      66              :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      67              : 
      68              : bool
      69     12074889 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      70              : {
      71     12074889 :   int_range_max vr;
      72     12074889 :   tree vr_min, vr_max;
      73     24149778 :   get_range_query (cfun)->range_of_expr (vr, var);
      74     12074889 :   if (vr.undefined_p ())
      75           84 :     vr.set_varying (TREE_TYPE (var));
      76     12074889 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      77     12074889 :   *min_value = wi::to_wide (vr_min);
      78     12074889 :   *max_value = wi::to_wide (vr_max);
      79     12074889 :   wide_int nonzero = get_nonzero_bits (var);
      80     12074889 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      81     12074889 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      82              :                                          nonzero, sgn) == VR_RANGE)
      83              :     {
      84      5958589 :       if (dump_enabled_p ())
      85              :         {
      86        87644 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      87        87644 :           dump_printf (MSG_NOTE, " has range [");
      88        87644 :           dump_hex (MSG_NOTE, *min_value);
      89        87644 :           dump_printf (MSG_NOTE, ", ");
      90        87644 :           dump_hex (MSG_NOTE, *max_value);
      91        87644 :           dump_printf (MSG_NOTE, "]\n");
      92              :         }
      93      5958589 :       return true;
      94              :     }
      95              :   else
      96              :     {
      97      6116300 :       if (dump_enabled_p ())
      98              :         {
      99        67122 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     100        67122 :           dump_printf (MSG_NOTE, " has no range info\n");
     101              :         }
     102      6116300 :       return false;
     103              :     }
     104     12074889 : }
     105              : 
     106              : /* Report that we've found an instance of pattern PATTERN in
     107              :    statement STMT.  */
     108              : 
     109              : static void
     110      1233359 : vect_pattern_detected (const char *name, gimple *stmt)
     111              : {
     112      1233359 :   if (dump_enabled_p ())
     113        24971 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     114      1233359 : }
     115              : 
     116              : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     117              :    return the pattern statement's stmt_vec_info.  Set its vector type to
     118              :    VECTYPE if it doesn't have one already.  */
     119              : 
     120              : static stmt_vec_info
     121      2370947 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     122              :                         stmt_vec_info orig_stmt_info, tree vectype)
     123              : {
     124      2370947 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     125      2370947 :   if (pattern_stmt_info == NULL)
     126      1369498 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     127      2370947 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     128              : 
     129      2370947 :   pattern_stmt_info->pattern_stmt_p = true;
     130      2370947 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     131      2370947 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     132      2370947 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     133      2370947 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     134              :     {
     135      2340839 :       gcc_assert (!vectype
     136              :                   || is_a <gcond *> (pattern_stmt)
     137              :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     138              :                       == vect_use_mask_type_p (orig_stmt_info)));
     139      1380193 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     140      1380193 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     141              :     }
     142      2370947 :   return pattern_stmt_info;
     143              : }
     144              : 
     145              : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     146              :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     147              :    have one already.  */
     148              : 
     149              : static void
     150      1012417 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     151              :                        stmt_vec_info orig_stmt_info, tree vectype)
     152              : {
     153      1012417 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     154      1012417 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     155            0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     156       981775 : }
     157              : 
     158              : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     159              :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     160              :    be different from the vector type of the final pattern statement.
     161              :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     162              :    from which it was derived.  */
     163              : 
     164              : static inline void
     165      1318046 : append_pattern_def_seq (vec_info *vinfo,
     166              :                         stmt_vec_info stmt_info, gimple *new_stmt,
     167              :                         tree vectype = NULL_TREE,
     168              :                         tree scalar_type_for_mask = NULL_TREE)
     169              : {
     170      2007327 :   gcc_assert (!scalar_type_for_mask
     171              :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     172      1318046 :   if (vectype)
     173              :     {
     174       990993 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     175       990993 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     176       990993 :       if (scalar_type_for_mask)
     177       628765 :         new_stmt_info->mask_precision
     178      1257530 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     179              :     }
     180      1318046 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     181              :                                       new_stmt);
     182      1318046 : }
     183              : 
     184              : 
     185              : /* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
     186              :    statements are not vectorized but are materialized as scalar in the loop
     187              :    preheader.  */
     188              : 
     189              : static inline void
     190         1373 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
     191              : {
     192         1373 :   gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
     193              : }
     194              : 
     195              : /* The caller wants to perform new operations on vect_external variable
     196              :    VAR, so that the result of the operations would also be vect_external.
     197              :    Return the edge on which the operations can be performed, if one exists.
     198              :    Return null if the operations should instead be treated as part of
     199              :    the pattern that needs them.  */
     200              : 
     201              : static edge
     202         8560 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     203              : {
     204         8560 :   edge e = NULL;
     205         8560 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     206              :     {
     207          819 :       e = loop_preheader_edge (loop_vinfo->loop);
     208          819 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     209              :         {
     210          623 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     211          623 :           if (bb == NULL
     212          623 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     213              :             e = NULL;
     214              :         }
     215              :     }
     216         8560 :   return e;
     217              : }
     218              : 
     219              : /* Return true if the target supports a vector version of CODE,
     220              :    where CODE is known to map to a direct optab with the given SUBTYPE.
     221              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     222              :    specifies the type of the scalar result.
     223              : 
     224              :    If CODE allows the inputs and outputs to have different type
     225              :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     226              :    than the output mode that determines the appropriate target pattern.
     227              :    Operand 0 of the target pattern then specifies the mode that the output
     228              :    must have.
     229              : 
     230              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     231              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     232              :    is nonnull.  */
     233              : 
     234              : static bool
     235          813 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     236              :                                  tree itype, tree *vecotype_out,
     237              :                                  tree *vecitype_out = NULL,
     238              :                                  enum optab_subtype subtype = optab_default)
     239              : {
     240          813 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     241          813 :   if (!vecitype)
     242              :     return false;
     243              : 
     244          813 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     245          813 :   if (!vecotype)
     246              :     return false;
     247              : 
     248          813 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     249          813 :   if (!optab)
     250              :     return false;
     251              : 
     252          813 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     253          813 :   if (icode == CODE_FOR_nothing
     254          813 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     255          394 :     return false;
     256              : 
     257          419 :   *vecotype_out = vecotype;
     258          419 :   if (vecitype_out)
     259          412 :     *vecitype_out = vecitype;
     260              :   return true;
     261              : }
     262              : 
     263              : /* Return true if the target supports a vector version of CODE,
     264              :    where CODE is known to map to a conversion optab with the given SUBTYPE.
     265              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     266              :    specifies the type of the scalar result.
     267              : 
     268              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     269              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     270              :    is nonnull.  */
     271              : 
     272              : static bool
     273         3635 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
     274              :                                  tree itype, tree *vecotype_out,
     275              :                                  tree *vecitype_out = NULL,
     276              :                                  enum optab_subtype subtype = optab_default)
     277              : {
     278         3635 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     279         3635 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     280         3635 :   if (!vecitype || !vecotype)
     281              :     return false;
     282              : 
     283         3373 :   if (!directly_supported_p (code, vecotype, vecitype, subtype))
     284              :     return false;
     285              : 
     286          698 :   *vecotype_out = vecotype;
     287          698 :   if (vecitype_out)
     288          698 :     *vecitype_out = vecitype;
     289              :   return true;
     290              : }
     291              : 
     292              : /* Round bit precision PRECISION up to a full element.  */
     293              : 
     294              : static unsigned int
     295      3197592 : vect_element_precision (unsigned int precision)
     296              : {
     297            0 :   precision = 1 << ceil_log2 (precision);
     298      4809179 :   return MAX (precision, BITS_PER_UNIT);
     299              : }
     300              : 
     301              : /* If OP is defined by a statement that's being considered for vectorization,
     302              :    return information about that statement, otherwise return NULL.  */
     303              : 
     304              : static stmt_vec_info
     305       330307 : vect_get_internal_def (vec_info *vinfo, tree op)
     306              : {
     307       330307 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     308       330307 :   if (def_stmt_info
     309       316780 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     310       300940 :     return vect_stmt_to_vectorize (def_stmt_info);
     311              :   return NULL;
     312              : }
     313              : 
     314              : /* Holds information about an input operand after some sign changes
     315              :    and type promotions have been peeled away.  */
     316              : class vect_unpromoted_value {
     317              : public:
     318              :   vect_unpromoted_value ();
     319              : 
     320              :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     321              : 
     322              :   /* The value obtained after peeling away zero or more casts.  */
     323              :   tree op;
     324              : 
     325              :   /* The type of OP.  */
     326              :   tree type;
     327              : 
     328              :   /* The definition type of OP.  */
     329              :   vect_def_type dt;
     330              : 
     331              :   /* If OP is the result of peeling at least one cast, and if the cast
     332              :      of OP itself is a vectorizable statement, CASTER identifies that
     333              :      statement, otherwise it is null.  */
     334              :   stmt_vec_info caster;
     335              : };
     336              : 
     337    291680022 : inline vect_unpromoted_value::vect_unpromoted_value ()
     338    291680022 :   : op (NULL_TREE),
     339    291680022 :     type (NULL_TREE),
     340    291680022 :     dt (vect_uninitialized_def),
     341      3209697 :     caster (NULL)
     342              : {
     343              : }
     344              : 
     345              : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     346              :    statement that casts it to CASTER_IN.  */
     347              : 
     348              : inline void
     349     11246474 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     350              :                                stmt_vec_info caster_in)
     351              : {
     352     11246474 :   op = op_in;
     353     11246474 :   type = TREE_TYPE (op);
     354     11246474 :   dt = dt_in;
     355     11246474 :   caster = caster_in;
     356     11246474 : }
     357              : 
     358              : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     359              :    to reach some vectorizable inner operand OP', continuing as long as it
     360              :    is possible to convert OP' back to OP using a possible sign change
     361              :    followed by a possible promotion P.  Return this OP', or null if OP is
     362              :    not a vectorizable SSA name.  If there is a promotion P, describe its
     363              :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     364              :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     365              :    have more than one user.
     366              : 
     367              :    A successful return means that it is possible to go from OP' to OP
     368              :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     369              :    whereas the cast from UNPROM to OP might be a promotion, a sign
     370              :    change, or a nop.
     371              : 
     372              :    E.g. say we have:
     373              : 
     374              :        signed short *ptr = ...;
     375              :        signed short C = *ptr;
     376              :        unsigned short B = (unsigned short) C;    // sign change
     377              :        signed int A = (signed int) B;            // unsigned promotion
     378              :        ...possible other uses of A...
     379              :        unsigned int OP = (unsigned int) A;       // sign change
     380              : 
     381              :    In this case it's possible to go directly from C to OP using:
     382              : 
     383              :        OP = (unsigned int) (unsigned short) C;
     384              :             +------------+ +--------------+
     385              :                promotion      sign change
     386              : 
     387              :    so OP' would be C.  The input to the promotion is B, so UNPROM
     388              :    would describe B.  */
     389              : 
     390              : static tree
     391      8289724 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     392              :                                       vect_unpromoted_value *unprom,
     393              :                                       bool *single_use_p = NULL)
     394              : {
     395      8289724 :   tree op_type = TREE_TYPE (op);
     396      8289724 :   if (!INTEGRAL_TYPE_P (op_type))
     397              :     return NULL_TREE;
     398              : 
     399      8239087 :   tree res = NULL_TREE;
     400      8239087 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     401      8239087 :   unsigned int min_precision = orig_precision;
     402      8239087 :   stmt_vec_info caster = NULL;
     403      9864598 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     404              :     {
     405              :       /* See whether OP is simple enough to vectorize.  */
     406      9648678 :       stmt_vec_info def_stmt_info;
     407      9648678 :       gimple *def_stmt;
     408      9648678 :       vect_def_type dt;
     409      9648678 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     410              :         break;
     411              : 
     412              :       /* If OP is the input of a demotion, skip over it to see whether
     413              :          OP is itself the result of a promotion.  If so, the combined
     414              :          effect of the promotion and the demotion might fit the required
     415              :          pattern, otherwise neither operation fits.
     416              : 
     417              :          This copes with cases such as the result of an arithmetic
     418              :          operation being truncated before being stored, and where that
     419              :          arithmetic operation has been recognized as an over-widened one.  */
     420      9632654 :       if (TYPE_PRECISION (op_type) <= min_precision)
     421              :         {
     422              :           /* Use OP as the UNPROM described above if we haven't yet
     423              :              found a promotion, or if using the new input preserves the
     424              :              sign of the previous promotion.  */
     425      9510928 :           if (!res
     426      1387238 :               || TYPE_PRECISION (unprom->type) == orig_precision
     427        38738 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
     428      9546844 :               || (TYPE_UNSIGNED (op_type)
     429        25246 :                   && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
     430              :             {
     431      9475448 :               unprom->set_op (op, dt, caster);
     432      9475448 :               min_precision = TYPE_PRECISION (op_type);
     433              :             }
     434              :           /* Stop if we've already seen a promotion and if this
     435              :              conversion does more than change the sign.  */
     436        35480 :           else if (TYPE_PRECISION (op_type)
     437        35480 :                    != TYPE_PRECISION (unprom->type))
     438              :             break;
     439              : 
     440              :           /* The sequence now extends to OP.  */
     441              :           res = op;
     442              :         }
     443              : 
     444              :       /* See whether OP is defined by a cast.  Record it as CASTER if
     445              :          the cast is potentially vectorizable.  */
     446      9632613 :       if (!def_stmt)
     447              :         break;
     448      9433763 :       caster = def_stmt_info;
     449              : 
     450              :       /* Ignore pattern statements, since we don't link uses for them.  */
     451      9433763 :       if (caster
     452      9433763 :           && single_use_p
     453      1943421 :           && !STMT_VINFO_RELATED_STMT (caster)
     454     11233781 :           && !has_single_use (res))
     455      1088013 :         *single_use_p = false;
     456              : 
     457     17456930 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     458      5906550 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     459              :         break;
     460              : 
     461              :       /* Continue with the input to the cast.  */
     462      1625511 :       op = gimple_assign_rhs1 (def_stmt);
     463      1625511 :       op_type = TREE_TYPE (op);
     464              :     }
     465              :   return res;
     466              : }
     467              : 
     468              : /* OP is an integer operand to an operation that returns TYPE, and we
     469              :    want to treat the operation as a widening one.  So far we can treat
     470              :    it as widening from *COMMON_TYPE.
     471              : 
     472              :    Return true if OP is suitable for such a widening operation,
     473              :    either widening from *COMMON_TYPE or from some supertype of it.
     474              :    Update *COMMON_TYPE to the supertype in the latter case.
     475              : 
     476              :    SHIFT_P is true if OP is a shift amount.  */
     477              : 
     478              : static bool
     479       300870 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     480              :                             tree *common_type)
     481              : {
     482              :   /* Calculate the minimum precision required by OP, without changing
     483              :      the sign of either operand.  */
     484       300870 :   unsigned int precision;
     485       300870 :   if (shift_p)
     486              :     {
     487        13011 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     488              :         return false;
     489        10433 :       precision = TREE_INT_CST_LOW (op);
     490              :     }
     491              :   else
     492              :     {
     493       287859 :       precision = wi::min_precision (wi::to_widest (op),
     494       287859 :                                      TYPE_SIGN (*common_type));
     495       287859 :       if (precision * 2 > TYPE_PRECISION (type))
     496              :         return false;
     497              :     }
     498              : 
     499              :   /* If OP requires a wider type, switch to that type.  The checks
     500              :      above ensure that this is still narrower than the result.  */
     501       284628 :   precision = vect_element_precision (precision);
     502       284628 :   if (TYPE_PRECISION (*common_type) < precision)
     503         6443 :     *common_type = build_nonstandard_integer_type
     504         6443 :       (precision, TYPE_UNSIGNED (*common_type));
     505              :   return true;
     506              : }
     507              : 
     508              : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     509              :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     510              : 
     511              : static bool
     512        44671 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     513              : {
     514        44671 :   if (types_compatible_p (*common_type, new_type))
     515              :     return true;
     516              : 
     517              :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     518         7720 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     519         7720 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     520              :     return true;
     521              : 
     522              :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     523         7096 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     524         7096 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     525              :     {
     526          336 :       *common_type = new_type;
     527          336 :       return true;
     528              :     }
     529              : 
     530              :   /* We have mismatched signs, with the signed type being
     531              :      no wider than the unsigned type.  In this case we need
     532              :      a wider signed type.  */
     533         6760 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     534              :                                 TYPE_PRECISION (new_type));
     535         6760 :   precision *= 2;
     536              : 
     537         6760 :   if (precision * 2 > TYPE_PRECISION (type))
     538              :     return false;
     539              : 
     540           43 :   *common_type = build_nonstandard_integer_type (precision, false);
     541           43 :   return true;
     542              : }
     543              : 
     544              : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     545              :    in which each node either performs CODE or WIDENED_CODE, and where
     546              :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     547              :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     548              :    CODE and WIDENED_CODE are some sort of shift.
     549              : 
     550              :    If STMT_INFO is such a tree, return the number of leaf operands
     551              :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     552              :    to a type that (a) is narrower than the result of STMT_INFO and
     553              :    (b) can hold all leaf operand values.
     554              : 
     555              :    If SUBTYPE then allow that the signs of the operands
     556              :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     557              :    this.
     558              : 
     559              :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     560              :    exists.  */
     561              : 
     562              : static unsigned int
     563    122973426 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     564              :                       code_helper widened_code, bool shift_p,
     565              :                       unsigned int max_nops,
     566              :                       vect_unpromoted_value *unprom, tree *common_type,
     567              :                       enum optab_subtype *subtype = NULL)
     568              : {
     569              :   /* Check for an integer operation with the right code.  */
     570    122973426 :   gimple* stmt = stmt_info->stmt;
     571    122973426 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     572              :     return 0;
     573              : 
     574     99252546 :   code_helper rhs_code;
     575     99252546 :   if (is_gimple_assign (stmt))
     576     84999784 :     rhs_code = gimple_assign_rhs_code (stmt);
     577     14252762 :   else if (is_gimple_call (stmt))
     578     14252762 :     rhs_code = gimple_call_combined_fn (stmt);
     579              :   else
     580              :     return 0;
     581              : 
     582     99252546 :   if (rhs_code != code
     583     99252546 :       && rhs_code != widened_code)
     584              :     return 0;
     585              : 
     586      6255410 :   tree lhs = gimple_get_lhs (stmt);
     587      6255410 :   tree type = TREE_TYPE (lhs);
     588      6255410 :   if (!INTEGRAL_TYPE_P (type))
     589              :     return 0;
     590              : 
     591              :   /* Assume that both operands will be leaf operands.  */
     592      5678676 :   max_nops -= 2;
     593              : 
     594              :   /* Check the operands.  */
     595      5678676 :   unsigned int next_op = 0;
     596      6426574 :   for (unsigned int i = 0; i < 2; ++i)
     597              :     {
     598      6103569 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     599      6103569 :       unsigned int nops = 1;
     600      6103569 :       tree op = gimple_arg (stmt, i);
     601      6103569 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     602              :         {
     603              :           /* We already have a common type from earlier operands.
     604              :              Update it to account for OP.  */
     605       300870 :           this_unprom->set_op (op, vect_constant_def);
     606       300870 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     607              :             return 0;
     608              :         }
     609              :       else
     610              :         {
     611              :           /* Only allow shifts by constants.  */
     612      5802699 :           if (shift_p && i == 1)
     613              :             return 0;
     614              : 
     615      5796574 :           if (rhs_code != code)
     616              :             {
     617              :               /* If rhs_code is widened_code, don't look through further
     618              :                  possible promotions, there is a promotion already embedded
     619              :                  in the WIDEN_*_EXPR.  */
     620         1624 :               if (TREE_CODE (op) != SSA_NAME
     621         1624 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     622            0 :                 return 0;
     623              : 
     624         1624 :               stmt_vec_info def_stmt_info;
     625         1624 :               gimple *def_stmt;
     626         1624 :               vect_def_type dt;
     627         1624 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     628              :                                        &def_stmt))
     629              :                 return 0;
     630         1624 :               this_unprom->set_op (op, dt, NULL);
     631              :             }
     632      5794950 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     633              :                                                           this_unprom))
     634              :             return 0;
     635              : 
     636      5685253 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     637              :             {
     638              :               /* The operand isn't widened.  If STMT_INFO has the code
     639              :                  for an unwidened operation, recursively check whether
     640              :                  this operand is a node of the tree.  */
     641      5211502 :               if (rhs_code != code
     642      5211502 :                   || max_nops == 0
     643      5211968 :                   || this_unprom->dt != vect_internal_def)
     644              :                 return 0;
     645              : 
     646              :               /* Give back the leaf slot allocated above now that we're
     647              :                  not treating this as a leaf operand.  */
     648          466 :               max_nops += 1;
     649              : 
     650              :               /* Recursively process the definition of the operand.  */
     651          466 :               stmt_vec_info def_stmt_info
     652          466 :                 = vect_get_internal_def (vinfo, this_unprom->op);
     653              : 
     654          466 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     655              :                                            widened_code, shift_p, max_nops,
     656              :                                            this_unprom, common_type,
     657              :                                            subtype);
     658          466 :               if (nops == 0)
     659              :                 return 0;
     660              : 
     661          311 :               max_nops -= nops;
     662              :             }
     663              :           else
     664              :             {
     665              :               /* Make sure that the operand is narrower than the result.  */
     666       473751 :               if (TYPE_PRECISION (this_unprom->type) * 2
     667       473751 :                   > TYPE_PRECISION (type))
     668              :                 return 0;
     669              : 
     670              :               /* Update COMMON_TYPE for the new operand.  */
     671       469391 :               if (i == 0)
     672       424720 :                 *common_type = this_unprom->type;
     673        44671 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     674              :                                                  common_type))
     675              :                 {
     676         6717 :                   if (subtype)
     677              :                     {
     678              :                       /* See if we can sign extend the smaller type.  */
     679          285 :                       if (TYPE_PRECISION (this_unprom->type)
     680          285 :                           > TYPE_PRECISION (*common_type))
     681           27 :                         *common_type = this_unprom->type;
     682          285 :                       *subtype = optab_vector_mixed_sign;
     683              :                     }
     684              :                   else
     685              :                     return 0;
     686              :                 }
     687              :             }
     688              :         }
     689       747898 :       next_op += nops;
     690              :     }
     691              :   return next_op;
     692              : }
     693              : 
     694              : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     695              :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     696              : 
     697              : static tree
     698      2011124 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     699              : {
     700            0 :   return make_temp_ssa_name (type, stmt, "patt");
     701              : }
     702              : 
     703              : /* STMT2_INFO describes a type conversion that could be split into STMT1
     704              :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     705              :    input.  Try to do this using pattern statements, returning true on
     706              :    success.  */
     707              : 
     708              : static bool
     709        31102 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     710              :                       gimple *stmt1, tree vectype)
     711              : {
     712        31102 :   if (is_pattern_stmt_p (stmt2_info))
     713              :     {
     714              :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     715              :          the pattern is attached.  */
     716          460 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     717          460 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     718              : 
     719          460 :       if (dump_enabled_p ())
     720           19 :         dump_printf_loc (MSG_NOTE, vect_location,
     721              :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     722              : 
     723              :       /* Since STMT2_INFO is a pattern statement, we can change it
     724              :          in-situ without worrying about changing the code for the
     725              :          containing block.  */
     726          460 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     727              : 
     728          460 :       if (dump_enabled_p ())
     729              :         {
     730           19 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     731           19 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     732              :                            stmt2_info->stmt);
     733              :         }
     734              : 
     735          460 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     736          460 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     737              :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     738              :            to the end of the definition sequence.  */
     739          457 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     740              :       else
     741              :         {
     742              :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     743              :              before it.  */
     744            3 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     745            3 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     746              :         }
     747          460 :       return true;
     748              :     }
     749              :   else
     750              :     {
     751              :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     752              :          two-statement pattern now.  */
     753        30642 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     754        30642 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     755        30642 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     756        30642 :       if (!lhs_vectype)
     757              :         return false;
     758              : 
     759        30642 :       if (dump_enabled_p ())
     760         1927 :         dump_printf_loc (MSG_NOTE, vect_location,
     761              :                          "Splitting statement: %G", stmt2_info->stmt);
     762              : 
     763              :       /* Add STMT1 as a singleton pattern definition sequence.  */
     764        30642 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     765        30642 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     766        30642 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     767              : 
     768              :       /* Build the second of the two pattern statements.  */
     769        30642 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     770        30642 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     771        30642 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     772              : 
     773        30642 :       if (dump_enabled_p ())
     774              :         {
     775         1927 :           dump_printf_loc (MSG_NOTE, vect_location,
     776              :                            "into pattern statements: %G", stmt1);
     777         1927 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     778              :                            (gimple *) new_stmt2);
     779              :         }
     780              : 
     781        30642 :       return true;
     782              :     }
     783              : }
     784              : 
     785              : /* Look for the following pattern
     786              :         X = x[i]
     787              :         Y = y[i]
     788              :         DIFF = X - Y
     789              :         DAD = ABS_EXPR<DIFF>
     790              : 
     791              :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     792              :    HALF_TYPE and UNPROM will be set should the statement be found to
     793              :    be a widened operation.
     794              :    DIFF_STMT will be set to the MINUS_EXPR
     795              :    statement that precedes the ABS_STMT if it is a MINUS_EXPR..
     796              :  */
     797              : static bool
     798     20888838 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     799              :                                 tree *half_type,
     800              :                                 vect_unpromoted_value unprom[2],
     801              :                                 gassign **diff_stmt)
     802              : {
     803     20888838 :   if (!abs_stmt)
     804              :     return false;
     805              : 
     806              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     807              :      inside the loop (in case we are analyzing an outer-loop).  */
     808     20888838 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     809     20888838 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     810              :     return false;
     811              : 
     812        21734 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     813        21734 :   tree abs_type = TREE_TYPE (abs_oprnd);
     814        21734 :   if (!abs_oprnd)
     815              :     return false;
     816        15936 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     817         6094 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     818        27680 :       || TYPE_UNSIGNED (abs_type))
     819              :     return false;
     820              : 
     821              :   /* Peel off conversions from the ABS input.  This can involve sign
     822              :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     823              :      or signed promotion, but it can't include unsigned promotion.
     824              :      (Note that ABS of an unsigned promotion should have been folded
     825              :      away before now anyway.)  */
     826         5946 :   vect_unpromoted_value unprom_diff;
     827         5946 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     828              :                                                     &unprom_diff);
     829         5946 :   if (!abs_oprnd)
     830              :     return false;
     831         5650 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     832         5650 :       && TYPE_UNSIGNED (unprom_diff.type))
     833              :     return false;
     834              : 
     835              :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     836         5650 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     837         5650 :   if (!diff_stmt_vinfo)
     838              :     return false;
     839              : 
     840         5479 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     841         5479 :   if (diff_stmt && diff
     842         4043 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     843         7584 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     844          275 :     *diff_stmt = diff;
     845              : 
     846              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     847              :      inside the loop (in case we are analyzing an outer-loop).  */
     848         5479 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     849         5479 :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     850              :                             false, 2, unprom, half_type))
     851              :     return true;
     852              : 
     853              :   return false;
     854              : }
     855              : 
     856              : /* Convert UNPROM to TYPE and return the result, adding new statements
     857              :    to STMT_INFO's pattern definition statements if no better way is
     858              :    available.  VECTYPE is the vector form of TYPE.
     859              : 
     860              :    If SUBTYPE then convert the type based on the subtype.  */
     861              : 
     862              : static tree
     863       461625 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     864              :                     vect_unpromoted_value *unprom, tree vectype,
     865              :                     enum optab_subtype subtype = optab_default)
     866              : {
     867              :   /* Update the type if the signs differ.  */
     868       461625 :   if (subtype == optab_vector_mixed_sign)
     869              :     {
     870          322 :       gcc_assert (!TYPE_UNSIGNED (type));
     871          322 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     872              :         {
     873          161 :           type = unsigned_type_for (type);
     874          161 :           vectype = unsigned_type_for (vectype);
     875              :         }
     876              :     }
     877              : 
     878              :   /* Check for a no-op conversion.  */
     879       461625 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     880       159814 :     return unprom->op;
     881              : 
     882              :   /* Allow the caller to create constant vect_unpromoted_values.  */
     883       301811 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     884       188122 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     885              : 
     886       113689 :   tree input = unprom->op;
     887       113689 :   if (unprom->caster)
     888              :     {
     889        62122 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     890        62122 :       tree lhs_type = TREE_TYPE (lhs);
     891              : 
     892              :       /* If the result of the existing cast is the right width, use it
     893              :          instead of the source of the cast.  */
     894        62122 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     895              :         input = lhs;
     896              :       /* If the precision we want is between the source and result
     897              :          precisions of the existing cast, try splitting the cast into
     898              :          two and tapping into a mid-way point.  */
     899        60056 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     900        60056 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     901              :         {
     902              :           /* In order to preserve the semantics of the original cast,
     903              :              give the mid-way point the same signedness as the input value.
     904              : 
     905              :              It would be possible to use a signed type here instead if
     906              :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     907              :              make the sign of the midtype sensitive to the order in
     908              :              which we process the statements, since the signedness of
     909              :              TYPE is the signedness required by just one of possibly
     910              :              many users.  Also, unsigned promotions are usually as cheap
     911              :              as or cheaper than signed ones, so it's better to keep an
     912              :              unsigned promotion.  */
     913        31102 :           tree midtype = build_nonstandard_integer_type
     914        31102 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     915        31102 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     916        31102 :           if (vec_midtype)
     917              :             {
     918        31102 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     919        31102 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     920              :                                                        unprom->op);
     921        31102 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     922              :                                          vec_midtype))
     923            0 :                 append_pattern_def_seq (vinfo, stmt_info,
     924              :                                         new_stmt, vec_midtype);
     925              :             }
     926              :         }
     927              : 
     928              :       /* See if we can reuse an existing result.  */
     929        62122 :       if (types_compatible_p (type, TREE_TYPE (input)))
     930              :         return input;
     931              :     }
     932              : 
     933              :   /* We need a new conversion statement.  */
     934        91202 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     935        91202 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     936              : 
     937              :   /* If OP is an external value, see if we can insert the new statement
     938              :      on an incoming edge.  */
     939        91202 :   if (input == unprom->op && unprom->dt == vect_external_def)
     940         8545 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     941              :       {
     942          804 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     943          804 :         gcc_assert (!new_bb);
     944              :         return new_op;
     945              :       }
     946              : 
     947              :   /* As a (common) last resort, add the statement to the pattern itself.  */
     948        90398 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     949        90398 :   return new_op;
     950              : }
     951              : 
     952              : /* Invoke vect_convert_input for N elements of UNPROM and store the
     953              :    result in the corresponding elements of RESULT.
     954              : 
     955              :    If SUBTYPE then convert the type based on the subtype.  */
     956              : 
     957              : static void
     958       234677 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     959              :                      tree *result, tree type, vect_unpromoted_value *unprom,
     960              :                      tree vectype, enum optab_subtype subtype = optab_default)
     961              : {
     962       696239 :   for (unsigned int i = 0; i < n; ++i)
     963              :     {
     964              :       unsigned int j;
     965       688010 :       for (j = 0; j < i; ++j)
     966       226885 :         if (unprom[j].op == unprom[i].op)
     967              :           break;
     968              : 
     969       461562 :       if (j < i)
     970          437 :         result[i] = result[j];
     971              :       else
     972       461125 :         result[i] = vect_convert_input (vinfo, stmt_info,
     973       461125 :                                         type, &unprom[i], vectype, subtype);
     974              :     }
     975       234677 : }
     976              : 
     977              : /* The caller has created a (possibly empty) sequence of pattern definition
     978              :    statements followed by a single statement PATTERN_STMT.  Cast the result
     979              :    of this final statement to TYPE.  If a new statement is needed, add
     980              :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     981              :    and return the new statement, otherwise return PATTERN_STMT as-is.
     982              :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     983              : 
     984              : static gimple *
     985       261267 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     986              :                      gimple *pattern_stmt, tree vecitype)
     987              : {
     988       261267 :   tree lhs = gimple_get_lhs (pattern_stmt);
     989       261267 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     990              :     {
     991       233711 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
     992       233711 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     993       233711 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     994              :     }
     995       261267 :   return pattern_stmt;
     996              : }
     997              : 
     998              : /* Return true if STMT_VINFO describes a reduction for which reassociation
     999              :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1000              :    a reduction chain and optimistically assume that all statements
    1001              :    except the last allow reassociation.
    1002              :    Also require it to have code CODE and to be a reduction
    1003              :    in the outermost loop.  When returning true, store the operands in
    1004              :    *OP0_OUT and *OP1_OUT.  */
    1005              : 
    1006              : static bool
    1007     91741802 : vect_reassociating_reduction_p (vec_info *vinfo,
    1008              :                                 stmt_vec_info stmt_info, tree_code code,
    1009              :                                 tree *op0_out, tree *op1_out)
    1010              : {
    1011     91741802 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1012     13345145 :   if (!loop_info)
    1013              :     return false;
    1014              : 
    1015     13345145 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1016     14569058 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1017              :     return false;
    1018              : 
    1019              :   /* We don't allow changing the order of the computation in the inner-loop
    1020              :      when doing outer-loop vectorization.  */
    1021      2610421 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1022     94194233 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1023              :     return false;
    1024              : 
    1025      2557312 :   if (!vect_is_reduction (stmt_info))
    1026              :     return false;
    1027              : 
    1028       169465 :   if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1029       169465 :                                    code))
    1030              :     return false;
    1031              : 
    1032       157990 :   *op0_out = gimple_assign_rhs1 (assign);
    1033       157990 :   *op1_out = gimple_assign_rhs2 (assign);
    1034       157990 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1035        63181 :     std::swap (*op0_out, *op1_out);
    1036              :   return true;
    1037              : }
    1038              : 
    1039              : /* Return true iff the target has a vector optab implementing the operation
    1040              :    CODE on type VECTYPE with SUBTYPE.  */
    1041              : 
    1042              : static bool
    1043       830187 : target_has_vecop_for_code (tree_code code, tree vectype,
    1044              :                            enum optab_subtype subtype = optab_vector)
    1045              : {
    1046       830187 :   optab voptab = optab_for_tree_code (code, vectype, subtype);
    1047       830187 :   return voptab && can_implement_p (voptab, TYPE_MODE (vectype));
    1048              : }
    1049              : 
    1050              : /* match.pd function to match
    1051              :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1052              :    with conditions:
    1053              :    1) @1, @2, c, d, a, b are all integral type.
    1054              :    2) There's single_use for both @1 and @2.
    1055              :    3) a, c have same precision.
    1056              :    4) c and @1 have different precision.
    1057              :    5) c, d are the same type or they can differ in sign when convert is
    1058              :    truncation.
    1059              : 
    1060              :    record a and c and d and @3.  */
    1061              : 
    1062              : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1063              : 
    1064              : /* Function vect_recog_cond_expr_convert
    1065              : 
    1066              :    Try to find the following pattern:
    1067              : 
    1068              :    TYPE_AB A,B;
    1069              :    TYPE_CD C,D;
    1070              :    TYPE_E E;
    1071              :    TYPE_E op_true = (TYPE_E) A;
    1072              :    TYPE_E op_false = (TYPE_E) B;
    1073              : 
    1074              :    E = C cmp D ? op_true : op_false;
    1075              : 
    1076              :    where
    1077              :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1078              :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1079              :    single_use of op_true and op_false.
    1080              :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1081              : 
    1082              :    Input:
    1083              : 
    1084              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1085              :    here it starts with E = c cmp D ? op_true : op_false;
    1086              : 
    1087              :    Output:
    1088              : 
    1089              :    TYPE1 E' = C cmp D ? A : B;
    1090              :    TYPE3 E = (TYPE3) E';
    1091              : 
    1092              :    There may extra nop_convert for A or B to handle different signness.
    1093              : 
    1094              :    * TYPE_OUT: The vector type of the output of this pattern.
    1095              : 
    1096              :    * Return value: A new stmt that will be used to replace the sequence of
    1097              :    stmts that constitute the pattern. In this case it will be:
    1098              :    E = (TYPE3)E';
    1099              :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1100              : 
    1101              : static gimple *
    1102     30649087 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1103              :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1104              : {
    1105     30649087 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1106     20973798 :   tree lhs, match[4], temp, type, new_lhs, op2, op1;
    1107     20973798 :   gimple *cond_stmt;
    1108     20973798 :   gimple *pattern_stmt;
    1109     30648984 :   enum tree_code code = NOP_EXPR;
    1110              : 
    1111     20973798 :   if (!last_stmt)
    1112              :     return NULL;
    1113              : 
    1114     20973798 :   lhs = gimple_assign_lhs (last_stmt);
    1115              : 
    1116              :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1117              :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1118     20973798 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1119              :     return NULL;
    1120              : 
    1121          103 :   if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    1122           20 :     code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
    1123           83 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
    1124            0 :     code = FIX_TRUNC_EXPR;
    1125              : 
    1126          103 :   op1 = match[1];
    1127          103 :   op2 = match[2];
    1128          103 :   type = TREE_TYPE (op1);
    1129              :   /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
    1130              :      SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
    1131              :      Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
    1132              :      or CONVERT_EXPR.  */
    1133          103 :   if (TREE_CODE (op1) == REAL_CST)
    1134              :     {
    1135           20 :       op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
    1136           20 :       type = TREE_TYPE (op2);
    1137           20 :       if (op1 == NULL_TREE)
    1138              :         return NULL;
    1139              :     }
    1140           83 :   else if (TREE_CODE (op2) == REAL_CST)
    1141              :     {
    1142            0 :       op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
    1143            0 :       if (op2 == NULL_TREE)
    1144              :         return NULL;
    1145              :     }
    1146           83 :   else if (code == NOP_EXPR)
    1147              :     {
    1148           83 :       if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1149              :         {
    1150           83 :           op2 = vect_recog_temp_ssa_var (type, NULL);
    1151           83 :           gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1152           83 :           append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt);
    1153              :         }
    1154              :     }
    1155              : 
    1156          103 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1157              : 
    1158          103 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1159          103 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1160              :                                                  op1, op2));
    1161          103 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt);
    1162          103 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1163          103 :   pattern_stmt = gimple_build_assign (new_lhs, code, temp);
    1164          103 :   *type_out = NULL_TREE;
    1165              : 
    1166          103 :   if (dump_enabled_p ())
    1167           20 :     dump_printf_loc (MSG_NOTE, vect_location,
    1168              :                      "created pattern stmt: %G", pattern_stmt);
    1169              :   return pattern_stmt;
    1170              : }
    1171              : 
    1172              : /* Function vect_recog_dot_prod_pattern
    1173              : 
    1174              :    Try to find the following pattern:
    1175              : 
    1176              :      type1a x_t
    1177              :      type1b y_t;
    1178              :      TYPE1 prod;
    1179              :      TYPE2 sum = init;
    1180              :    loop:
    1181              :      sum_0 = phi <init, sum_1>
    1182              :      S1  x_t = ...
    1183              :      S2  y_t = ...
    1184              :      S3  x_T = (TYPE1) x_t;
    1185              :      S4  y_T = (TYPE1) y_t;
    1186              :      S5  prod = x_T * y_T;
    1187              :      [S6  prod = (TYPE2) prod;  #optional]
    1188              :      S7  sum_1 = prod + sum_0;
    1189              : 
    1190              :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1191              :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1192              :    'type1a' and 'type1b' can differ.
    1193              : 
    1194              :    Input:
    1195              : 
    1196              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1197              :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1198              :    will be detected.
    1199              : 
    1200              :    Output:
    1201              : 
    1202              :    * TYPE_OUT: The type of the output  of this pattern.
    1203              : 
    1204              :    * Return value: A new stmt that will be used to replace the sequence of
    1205              :    stmts that constitute the pattern. In this case it will be:
    1206              :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1207              : 
    1208              :    Note: The dot-prod idiom is a widening reduction pattern that is
    1209              :          vectorized without preserving all the intermediate results. It
    1210              :          produces only N/2 (widened) results (by summing up pairs of
    1211              :          intermediate results) rather than all N results.  Therefore, we
    1212              :          cannot allow this pattern when we want to get all the results and in
    1213              :          the correct order (as is the case when this computation is in an
    1214              :          inner-loop nested in an outer-loop that us being vectorized).  */
    1215              : 
    1216              : static gimple *
    1217     30581200 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1218              :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1219              : {
    1220     30581200 :   tree oprnd0, oprnd1;
    1221     30581200 :   gimple *last_stmt = stmt_vinfo->stmt;
    1222     30581200 :   tree type, half_type;
    1223     30581200 :   gimple *pattern_stmt;
    1224     30581200 :   tree var;
    1225              : 
    1226              :   /* Look for the following pattern
    1227              :           DX = (TYPE1) X;
    1228              :           DY = (TYPE1) Y;
    1229              :           DPROD = DX * DY;
    1230              :           DDPROD = (TYPE2) DPROD;
    1231              :           sum_1 = DDPROD + sum_0;
    1232              :      In which
    1233              :      - DX is double the size of X
    1234              :      - DY is double the size of Y
    1235              :      - DX, DY, DPROD all have the same type but the sign
    1236              :        between X, Y and DPROD can differ.
    1237              :      - sum is the same size of DPROD or bigger
    1238              :      - sum has been recognized as a reduction variable.
    1239              : 
    1240              :      This is equivalent to:
    1241              :        DPROD = X w* Y;          #widen mult
    1242              :        sum_1 = DPROD w+ sum_0;  #widen summation
    1243              :      or
    1244              :        DPROD = X w* Y;          #widen mult
    1245              :        sum_1 = DPROD + sum_0;   #summation
    1246              :    */
    1247              : 
    1248              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1249              :      of the above pattern.  */
    1250              : 
    1251     30581200 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1252              :                                        &oprnd0, &oprnd1))
    1253              :     return NULL;
    1254              : 
    1255        53266 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1256              : 
    1257        53266 :   vect_unpromoted_value unprom_mult;
    1258        53266 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1259              : 
    1260              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1261              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1262              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1263              :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1264        53266 :   if (!oprnd0)
    1265              :     return NULL;
    1266              : 
    1267        36191 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1268        36191 :   if (!mult_vinfo)
    1269              :     return NULL;
    1270              : 
    1271              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1272              :      inside the loop (in case we are analyzing an outer-loop).  */
    1273       106029 :   vect_unpromoted_value unprom0[2];
    1274        35343 :   enum optab_subtype subtype = optab_vector;
    1275        35343 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1276              :                              false, 2, unprom0, &half_type, &subtype))
    1277              :     return NULL;
    1278              : 
    1279              :   /* If there are two widening operations, make sure they agree on the sign
    1280              :      of the extension.  The result of an optab_vector_mixed_sign operation
    1281              :      is signed; otherwise, the result has the same sign as the operands.  */
    1282         1347 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1283         2053 :       && (subtype == optab_vector_mixed_sign
    1284          706 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1285          503 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1286              :     return NULL;
    1287              : 
    1288         1266 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1289              : 
    1290              :   /* If the inputs have mixed signs, canonicalize on using the signed
    1291              :      input type for analysis.  This also helps when emulating mixed-sign
    1292              :      operations using signed operations.  */
    1293         1266 :   if (subtype == optab_vector_mixed_sign)
    1294          240 :     half_type = signed_type_for (half_type);
    1295              : 
    1296         1266 :   tree half_vectype;
    1297         1266 :   if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1298              :                                         type_out, &half_vectype, subtype))
    1299              :     {
    1300              :       /* We can emulate a mixed-sign dot-product using a sequence of
    1301              :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1302          583 :       if (subtype != optab_vector_mixed_sign
    1303          583 :           || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
    1304              :                                                DOT_PROD_EXPR, half_type,
    1305              :                                                type_out, &half_vectype,
    1306              :                                                optab_vector))
    1307          568 :         return NULL;
    1308              : 
    1309           15 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1310              :                                                *type_out);
    1311              :     }
    1312              : 
    1313              :   /* Get the inputs in the appropriate types.  */
    1314          698 :   tree mult_oprnd[2];
    1315          698 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1316              :                        unprom0, half_vectype, subtype);
    1317              : 
    1318          698 :   var = vect_recog_temp_ssa_var (type, NULL);
    1319          698 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1320              :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1321              : 
    1322          698 :   return pattern_stmt;
    1323              : }
    1324              : 
    1325              : 
    1326              : /* Function vect_recog_sad_pattern
    1327              : 
    1328              :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1329              : 
    1330              :      type x_t, y_t;
    1331              :      signed TYPE1 diff, abs_diff;
    1332              :      TYPE2 sum = init;
    1333              :    loop:
    1334              :      sum_0 = phi <init, sum_1>
    1335              :      S1  x_t = ...
    1336              :      S2  y_t = ...
    1337              :      S3  x_T = (TYPE1) x_t;
    1338              :      S4  y_T = (TYPE1) y_t;
    1339              :      S5  diff = x_T - y_T;
    1340              :      S6  abs_diff = ABS_EXPR <diff>;
    1341              :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1342              :      S8  sum_1 = abs_diff + sum_0;
    1343              : 
    1344              :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1345              :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1346              :    computation.
    1347              : 
    1348              :    Input:
    1349              : 
    1350              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1351              :    example, when this function is called with S8, the pattern
    1352              :    {S3,S4,S5,S6,S7,S8} will be detected.
    1353              : 
    1354              :    Output:
    1355              : 
    1356              :    * TYPE_OUT: The type of the output of this pattern.
    1357              : 
    1358              :    * Return value: A new stmt that will be used to replace the sequence of
    1359              :    stmts that constitute the pattern. In this case it will be:
    1360              :         SAD_EXPR <x_t, y_t, sum_0>
    1361              :   */
    1362              : 
    1363              : static gimple *
    1364     30580507 : vect_recog_sad_pattern (vec_info *vinfo,
    1365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1366              : {
    1367     30580507 :   gimple *last_stmt = stmt_vinfo->stmt;
    1368     30580507 :   tree half_type;
    1369              : 
    1370              :   /* Look for the following pattern
    1371              :           DX = (TYPE1) X;
    1372              :           DY = (TYPE1) Y;
    1373              :           DDIFF = DX - DY;
    1374              :           DAD = ABS_EXPR <DDIFF>;
    1375              :           DDPROD = (TYPE2) DPROD;
    1376              :           sum_1 = DAD + sum_0;
    1377              :      In which
    1378              :      - DX is at least double the size of X
    1379              :      - DY is at least double the size of Y
    1380              :      - DX, DY, DDIFF, DAD all have the same type
    1381              :      - sum is the same size of DAD or bigger
    1382              :      - sum has been recognized as a reduction variable.
    1383              : 
    1384              :      This is equivalent to:
    1385              :        DDIFF = X w- Y;          #widen sub
    1386              :        DAD = ABS_EXPR <DDIFF>;
    1387              :        sum_1 = DAD w+ sum_0;    #widen summation
    1388              :      or
    1389              :        DDIFF = X w- Y;          #widen sub
    1390              :        DAD = ABS_EXPR <DDIFF>;
    1391              :        sum_1 = DAD + sum_0;     #summation
    1392              :    */
    1393              : 
    1394              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1395              :      of the above pattern.  */
    1396              : 
    1397     30580507 :   tree plus_oprnd0, plus_oprnd1;
    1398     30580507 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1399              :                                        &plus_oprnd0, &plus_oprnd1))
    1400              :     return NULL;
    1401              : 
    1402        52568 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1403              : 
    1404              :   /* Any non-truncating sequence of conversions is OK here, since
    1405              :      with a successful match, the result of the ABS(U) is known to fit
    1406              :      within the nonnegative range of the result type.  (It cannot be the
    1407              :      negative of the minimum signed value due to the range of the widening
    1408              :      MINUS_EXPR.)  */
    1409        52568 :   vect_unpromoted_value unprom_abs;
    1410        52568 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1411              :                                                       &unprom_abs);
    1412              : 
    1413              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1414              :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1415              :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1416              :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1417              : 
    1418        52568 :   if (!plus_oprnd0)
    1419              :     return NULL;
    1420              : 
    1421        35493 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1422        35493 :   if (!abs_stmt_vinfo)
    1423              :     return NULL;
    1424              : 
    1425              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1426              :      inside the loop (in case we are analyzing an outer-loop).  */
    1427        34645 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1428       103935 :   vect_unpromoted_value unprom[2];
    1429              : 
    1430        34645 :   if (!abs_stmt)
    1431              :     {
    1432     30580375 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1433          280 :       if (!abd_stmt
    1434          280 :           || !gimple_call_internal_p (abd_stmt)
    1435            0 :           || gimple_call_num_args (abd_stmt) != 2)
    1436              :         return NULL;
    1437              : 
    1438            0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1439            0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1440              : 
    1441            0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
    1442            0 :           || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1443              :         {
    1444            0 :           unprom[0].op = abd_oprnd0;
    1445            0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1446            0 :           unprom[1].op = abd_oprnd1;
    1447            0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1448              :         }
    1449              :       else
    1450              :         return NULL;
    1451              : 
    1452            0 :       half_type = unprom[0].type;
    1453              :     }
    1454        34300 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1455              :                                             unprom, NULL))
    1456              :     return NULL;
    1457              : 
    1458          806 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1459              : 
    1460          806 :   tree half_vectype;
    1461          806 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1462              :                                         type_out, &half_vectype))
    1463              :     return NULL;
    1464              : 
    1465              :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1466          412 :   tree sad_oprnd[2];
    1467          412 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1468              :                        unprom, half_vectype);
    1469              : 
    1470          412 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1471          412 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1472              :                                               sad_oprnd[1], plus_oprnd1);
    1473              : 
    1474          412 :   return pattern_stmt;
    1475              : }
    1476              : 
    1477              : /* Function vect_recog_abd_pattern
    1478              : 
    1479              :    Try to find the following ABsolute Difference (ABD) or
    1480              :    widening ABD (WIDEN_ABD) pattern:
    1481              : 
    1482              :    TYPE1 x;
    1483              :    TYPE2 y;
    1484              :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1485              :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1486              :    TYPE3 diff = x_cast - y_cast;
    1487              :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1488              :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1489              : 
    1490              :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1491              :    twice as wide as TYPE3.
    1492              : 
    1493              :    Input:
    1494              : 
    1495              :    * STMT_VINFO: The stmt from which the pattern search begins
    1496              : 
    1497              :    Output:
    1498              : 
    1499              :    * TYPE_OUT: The type of the output of this pattern
    1500              : 
    1501              :    * Return value: A new stmt that will be used to replace the sequence of
    1502              :      stmts that constitute the pattern, principally:
    1503              :         out = IFN_ABD (x, y)
    1504              :         out = IFN_WIDEN_ABD (x, y)
    1505              :  */
    1506              : 
    1507              : static gimple *
    1508     30529669 : vect_recog_abd_pattern (vec_info *vinfo,
    1509              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1510              : {
    1511     51384207 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1512     20854538 :   if (!last_stmt)
    1513              :     return NULL;
    1514              : 
    1515     20854538 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1516              : 
    1517     62563614 :   vect_unpromoted_value unprom[2];
    1518     20854538 :   gassign *diff_stmt = NULL;
    1519     20854538 :   tree abd_in_type;
    1520     20854538 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
    1521              :                                        unprom, &diff_stmt))
    1522              :     {
    1523              :       /* We cannot try further without having a non-widening MINUS.  */
    1524     20852924 :       if (!diff_stmt)
    1525              :         return NULL;
    1526              : 
    1527          275 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1528          275 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1529          275 :       abd_in_type = signed_type_for (out_type);
    1530              :     }
    1531              : 
    1532         1889 :   tree abd_out_type = abd_in_type;
    1533              : 
    1534         1889 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1535         1889 :   if (!vectype_in)
    1536              :     return NULL;
    1537              : 
    1538         1872 :   internal_fn ifn = IFN_ABD;
    1539         1872 :   tree vectype_out = vectype_in;
    1540              : 
    1541         1872 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1542         1872 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1543              :     {
    1544         1505 :       tree mid_type
    1545         1505 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1546         1505 :                                           TYPE_UNSIGNED (abd_in_type));
    1547         1505 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1548              : 
    1549         1505 :       code_helper dummy_code;
    1550         1505 :       int dummy_int;
    1551         1505 :       auto_vec<tree> dummy_vec;
    1552         1505 :       if (mid_vectype
    1553         1505 :           && supportable_widening_operation (IFN_VEC_WIDEN_ABD,
    1554              :                                              mid_vectype, vectype_in, false,
    1555              :                                              &dummy_code, &dummy_code,
    1556              :                                              &dummy_int, &dummy_vec))
    1557              :         {
    1558            0 :           ifn = IFN_VEC_WIDEN_ABD;
    1559            0 :           abd_out_type = mid_type;
    1560            0 :           vectype_out = mid_vectype;
    1561              :         }
    1562         1505 :     }
    1563              : 
    1564         1505 :   if (ifn == IFN_ABD
    1565         1872 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1566              :                                           OPTIMIZE_FOR_SPEED))
    1567              :     return NULL;
    1568              : 
    1569            0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1570              : 
    1571            0 :   tree abd_oprnds[2];
    1572            0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1573              :                        abd_in_type, unprom, vectype_in);
    1574              : 
    1575            0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1576              : 
    1577            0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1578            0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1579              :                                                 abd_oprnds[0], abd_oprnds[1]);
    1580            0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1581            0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1582              : 
    1583            0 :   gimple *stmt = abd_stmt;
    1584            0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1585            0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1586            0 :       && !TYPE_UNSIGNED (abd_out_type))
    1587              :     {
    1588            0 :       tree unsign = unsigned_type_for (abd_out_type);
    1589            0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1590            0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1591              :     }
    1592              : 
    1593            0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1594              : }
    1595              : 
    1596              : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1597              :    so that it can be treated as though it had the form:
    1598              : 
    1599              :       A_TYPE a;
    1600              :       B_TYPE b;
    1601              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1602              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1603              :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1604              :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1605              :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1606              : 
    1607              :    Try to replace the pattern with:
    1608              : 
    1609              :       A_TYPE a;
    1610              :       B_TYPE b;
    1611              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1612              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1613              :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1614              :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1615              : 
    1616              :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1617              : 
    1618              :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1619              :    name of the pattern being matched, for dump purposes.  */
    1620              : 
    1621              : static gimple *
    1622    122908319 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1623              :                              stmt_vec_info last_stmt_info, tree *type_out,
    1624              :                              tree_code orig_code, code_helper wide_code,
    1625              :                              bool shift_p, const char *name)
    1626              : {
    1627    122908319 :   gimple *last_stmt = last_stmt_info->stmt;
    1628              : 
    1629    368724957 :   vect_unpromoted_value unprom[2];
    1630    122908319 :   tree half_type;
    1631    122908319 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1632              :                              shift_p, 2, unprom, &half_type))
    1633              : 
    1634              :     return NULL;
    1635              : 
    1636              :   /* Pattern detected.  */
    1637       316802 :   vect_pattern_detected (name, last_stmt);
    1638              : 
    1639       316802 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1640       316802 :   tree itype = type;
    1641       316802 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1642       316802 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1643       222851 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1644       222851 :                                             TYPE_UNSIGNED (half_type));
    1645              : 
    1646              :   /* Check target support  */
    1647       316802 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1648       316802 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1649       316802 :   tree ctype = itype;
    1650       316802 :   tree vecctype = vecitype;
    1651       316802 :   if (orig_code == MINUS_EXPR
    1652         9474 :       && TYPE_UNSIGNED (itype)
    1653       321290 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1654              :     {
    1655              :       /* Subtraction is special, even if half_type is unsigned and no matter
    1656              :          whether type is signed or unsigned, if type is wider than itype,
    1657              :          we need to sign-extend from the widening operation result to the
    1658              :          result type.
    1659              :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1660              :          itype unsigned short and type either int or unsigned int.
    1661              :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1662              :          (unsigned short) 0xffff, but for type int we want the result -1
    1663              :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1664          694 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1665          694 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1666              :     }
    1667              : 
    1668       316802 :   code_helper dummy_code;
    1669       316802 :   int dummy_int;
    1670       316802 :   auto_vec<tree> dummy_vec;
    1671       316802 :   if (!vectype
    1672       316802 :       || !vecitype
    1673       246598 :       || !vecctype
    1674       563400 :       || !supportable_widening_operation (wide_code, vecitype, vectype, true,
    1675              :                                           &dummy_code, &dummy_code,
    1676              :                                           &dummy_int, &dummy_vec))
    1677       210486 :     return NULL;
    1678              : 
    1679       106316 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1680       106316 :   if (!*type_out)
    1681              :     return NULL;
    1682              : 
    1683       106316 :   tree oprnd[2];
    1684       106316 :   vect_convert_inputs (vinfo, last_stmt_info,
    1685              :                        2, oprnd, half_type, unprom, vectype);
    1686              : 
    1687       106316 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1688       106316 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1689              : 
    1690       106316 :   if (vecctype != vecitype)
    1691            0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1692              :                                         pattern_stmt, vecitype);
    1693              : 
    1694       106316 :   return vect_convert_output (vinfo, last_stmt_info,
    1695       106316 :                               type, pattern_stmt, vecctype);
    1696       316802 : }
    1697              : 
    1698              : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1699              :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1700              : 
    1701              : static gimple *
    1702     30605834 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1703              :                                tree *type_out)
    1704              : {
    1705     30605834 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1706     30605834 :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1707     30605834 :                                       "vect_recog_widen_mult_pattern");
    1708              : }
    1709              : 
    1710              : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1711              :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1712              : 
    1713              : static gimple *
    1714     30860923 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1715              :                                tree *type_out)
    1716              : {
    1717     30860923 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1718     30860923 :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1719     30860923 :                                       false, "vect_recog_widen_plus_pattern");
    1720              : }
    1721              : 
    1722              : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1723              :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1724              : static gimple *
    1725     30860923 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1726              :                                tree *type_out)
    1727              : {
    1728     30860923 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1729     30860923 :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1730     30860923 :                                       false, "vect_recog_widen_minus_pattern");
    1731              : }
    1732              : 
    1733              : /* Try to detect abd on widened inputs, converting IFN_ABD
    1734              :    to IFN_VEC_WIDEN_ABD.  */
    1735              : static gimple *
    1736     30860923 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1737              :                               tree *type_out)
    1738              : {
    1739     30860923 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1740     28562843 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1741              :     return NULL;
    1742              : 
    1743      2985912 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1744              : 
    1745      2985912 :   tree in_type = TREE_TYPE (last_rhs);
    1746      2985912 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1747      2985912 :   if (!INTEGRAL_TYPE_P (in_type)
    1748      2673579 :       || !INTEGRAL_TYPE_P (out_type)
    1749      2558591 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1750      3603867 :       || !TYPE_UNSIGNED (in_type))
    1751              :     return NULL;
    1752              : 
    1753       212539 :   vect_unpromoted_value unprom;
    1754       212539 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1755       212539 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1756              :     return NULL;
    1757              : 
    1758       210032 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1759       210032 :   if (!abd_pattern_vinfo)
    1760              :     return NULL;
    1761              : 
    1762     30870072 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1763         9149 :   if (!abd_stmt
    1764         9149 :       || !gimple_call_internal_p (abd_stmt)
    1765          253 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1766              :     return NULL;
    1767              : 
    1768            0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1769            0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1770              : 
    1771            0 :   code_helper dummy_code;
    1772            0 :   int dummy_int;
    1773            0 :   auto_vec<tree> dummy_vec;
    1774            0 :   if (!supportable_widening_operation (IFN_VEC_WIDEN_ABD, vectype_out,
    1775              :                                        vectype_in, false,
    1776              :                                        &dummy_code, &dummy_code,
    1777              :                                        &dummy_int, &dummy_vec))
    1778              :     return NULL;
    1779              : 
    1780            0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1781              : 
    1782            0 :   *type_out = vectype_out;
    1783              : 
    1784            0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1785            0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1786            0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1787            0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1788              :                                                       abd_oprnd0, abd_oprnd1);
    1789            0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1790            0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1791            0 :   return widen_abd_stmt;
    1792            0 : }
    1793              : 
    1794              : /* Function vect_recog_ctz_ffs_pattern
    1795              : 
    1796              :    Try to find the following pattern:
    1797              : 
    1798              :    TYPE1 A;
    1799              :    TYPE1 B;
    1800              : 
    1801              :    B = __builtin_ctz{,l,ll} (A);
    1802              : 
    1803              :    or
    1804              : 
    1805              :    B = __builtin_ffs{,l,ll} (A);
    1806              : 
    1807              :    Input:
    1808              : 
    1809              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1810              :    here it starts with B = __builtin_* (A);
    1811              : 
    1812              :    Output:
    1813              : 
    1814              :    * TYPE_OUT: The vector type of the output of this pattern.
    1815              : 
    1816              :    * Return value: A new stmt that will be used to replace the sequence of
    1817              :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1818              : 
    1819              : static gimple *
    1820     30580446 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1821              :                             tree *type_out)
    1822              : {
    1823     30580446 :   gimple *call_stmt = stmt_vinfo->stmt;
    1824     30580446 :   gimple *pattern_stmt;
    1825     30580446 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1826     30580446 :   tree new_var;
    1827     30580446 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1828     30580446 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1829     30580446 :   int val = 0, val_new = 0, val_cmp = 0;
    1830     30580446 :   int prec;
    1831     30580446 :   int sub = 0, add = 0;
    1832     30580446 :   location_t loc;
    1833              : 
    1834     30580446 :   if (!is_gimple_call (call_stmt))
    1835              :     return NULL;
    1836              : 
    1837      3566411 :   if (gimple_call_num_args (call_stmt) != 1
    1838      3566411 :       && gimple_call_num_args (call_stmt) != 2)
    1839              :     return NULL;
    1840              : 
    1841      1999643 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1842      1999643 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1843      1999643 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1844      1999643 :   if (!lhs_oprnd)
    1845              :     return NULL;
    1846       971674 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1847       971674 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1848       326583 :       || !INTEGRAL_TYPE_P (rhs_type)
    1849        45086 :       || !type_has_mode_precision_p (rhs_type)
    1850      1015174 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1851       939832 :     return NULL;
    1852              : 
    1853        31842 :   switch (gimple_call_combined_fn (call_stmt))
    1854              :     {
    1855         1529 :     CASE_CFN_CTZ:
    1856         1529 :       ifn = IFN_CTZ;
    1857         1529 :       if (!gimple_call_internal_p (call_stmt)
    1858         1529 :           || gimple_call_num_args (call_stmt) != 2)
    1859              :         defined_at_zero = false;
    1860              :       else
    1861          121 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1862              :       break;
    1863              :     CASE_CFN_FFS:
    1864              :       ifn = IFN_FFS;
    1865              :       break;
    1866              :     default:
    1867              :       return NULL;
    1868              :     }
    1869              : 
    1870         1764 :   prec = TYPE_PRECISION (rhs_type);
    1871         1764 :   loc = gimple_location (call_stmt);
    1872              : 
    1873         1764 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1874         1764 :   if (!vec_type)
    1875              :     return NULL;
    1876              : 
    1877         1758 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1878         1758 :   if (!vec_rhs_type)
    1879              :     return NULL;
    1880              : 
    1881              :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1882              :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1883              :      popcount<vector_mode>2.  */
    1884         1521 :   if (!vec_type
    1885         1521 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1886              :                                          OPTIMIZE_FOR_SPEED))
    1887              :     return NULL;
    1888              : 
    1889         1521 :   if (ifn == IFN_FFS
    1890         1521 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1891              :                                          OPTIMIZE_FOR_SPEED))
    1892              :     {
    1893            0 :       ifnnew = IFN_CTZ;
    1894            0 :       defined_at_zero_new
    1895            0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1896              :                                      val_new) == 2;
    1897              :     }
    1898         1521 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1899              :                                            OPTIMIZE_FOR_SPEED))
    1900              :     {
    1901          160 :       ifnnew = IFN_CLZ;
    1902          160 :       defined_at_zero_new
    1903          160 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1904              :                                      val_new) == 2;
    1905              :     }
    1906          160 :   if ((ifnnew == IFN_LAST
    1907          160 :        || (defined_at_zero && !defined_at_zero_new))
    1908         1361 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1909              :                                          OPTIMIZE_FOR_SPEED))
    1910              :     {
    1911              :       ifnnew = IFN_POPCOUNT;
    1912              :       defined_at_zero_new = true;
    1913              :       val_new = prec;
    1914              :     }
    1915         1395 :   if (ifnnew == IFN_LAST)
    1916              :     return NULL;
    1917              : 
    1918          286 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1919              : 
    1920          286 :   val_cmp = val_new;
    1921          286 :   if ((ifnnew == IFN_CLZ
    1922          286 :        && defined_at_zero
    1923          106 :        && defined_at_zero_new
    1924          106 :        && val == prec
    1925           54 :        && val_new == prec)
    1926          232 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1927              :     {
    1928          137 :       if (vect_is_reduction (stmt_vinfo))
    1929              :         return NULL;
    1930              : 
    1931              :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1932              :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1933          137 :       if (ifnnew == IFN_CLZ)
    1934           54 :         sub = prec;
    1935          137 :       val_cmp = prec;
    1936              : 
    1937          137 :       if (!TYPE_UNSIGNED (rhs_type))
    1938              :         {
    1939           12 :           rhs_type = unsigned_type_for (rhs_type);
    1940           12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1941           12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1942           12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1943           12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1944              :                                   vec_rhs_type);
    1945           12 :           rhs_oprnd = new_var;
    1946              :         }
    1947              : 
    1948          137 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1949          137 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1950              :                                           build_int_cst (rhs_type, -1));
    1951          137 :       gimple_set_location (pattern_stmt, loc);
    1952          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1953              : 
    1954          137 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1955          137 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1956          137 :       gimple_set_location (pattern_stmt, loc);
    1957          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1958          137 :       rhs_oprnd = new_var;
    1959              : 
    1960          137 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1961          137 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1962              :                                           m1, rhs_oprnd);
    1963          137 :       gimple_set_location (pattern_stmt, loc);
    1964          137 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1965          137 :       rhs_oprnd = new_var;
    1966          137 :     }
    1967          149 :   else if (ifnnew == IFN_CLZ)
    1968              :     {
    1969          106 :       if (vect_is_reduction (stmt_vinfo))
    1970              :         return NULL;
    1971              : 
    1972              :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1973              :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1974          106 :       sub = prec - (ifn == IFN_CTZ);
    1975          106 :       val_cmp = sub - val_new;
    1976              : 
    1977          106 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1978          106 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1979          106 :       gimple_set_location (pattern_stmt, loc);
    1980          106 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1981              : 
    1982          106 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1983          106 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1984              :                                           rhs_oprnd, neg);
    1985          106 :       gimple_set_location (pattern_stmt, loc);
    1986          106 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1987          106 :       rhs_oprnd = new_var;
    1988              :     }
    1989           43 :   else if (ifnnew == IFN_POPCOUNT)
    1990              :     {
    1991           43 :       if (vect_is_reduction (stmt_vinfo))
    1992              :         return NULL;
    1993              : 
    1994              :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1995              :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1996           43 :       sub = prec + (ifn == IFN_FFS);
    1997           43 :       val_cmp = sub;
    1998              : 
    1999           43 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    2000           43 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    2001           43 :       gimple_set_location (pattern_stmt, loc);
    2002           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2003              : 
    2004           43 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    2005           43 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    2006              :                                           rhs_oprnd, neg);
    2007           43 :       gimple_set_location (pattern_stmt, loc);
    2008           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2009           43 :       rhs_oprnd = new_var;
    2010              :     }
    2011            0 :   else if (ifnnew == IFN_CTZ)
    2012              :     {
    2013              :       /* .FFS (X) = .CTZ (X) + 1.  */
    2014            0 :       add = 1;
    2015            0 :       val_cmp++;
    2016              : 
    2017            0 :       if (vect_is_reduction (stmt_vinfo)
    2018            0 :           && defined_at_zero
    2019            0 :           && (!defined_at_zero_new || val != val_cmp))
    2020              :         return NULL;
    2021              :     }
    2022              : 
    2023              :   /* Create B = .IFNNEW (A).  */
    2024          286 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2025          286 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    2026          160 :     pattern_stmt
    2027          160 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    2028              :                                     build_int_cst (integer_type_node,
    2029          160 :                                                    val_new));
    2030              :   else
    2031          126 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    2032          286 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2033          286 :   gimple_set_location (pattern_stmt, loc);
    2034          286 :   *type_out = vec_type;
    2035              : 
    2036          286 :   if (sub)
    2037              :     {
    2038          203 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2039          203 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2040          203 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2041          203 :                                           build_int_cst (lhs_type, sub),
    2042              :                                           new_var);
    2043          203 :       gimple_set_location (pattern_stmt, loc);
    2044          203 :       new_var = ret_var;
    2045              :     }
    2046           83 :   else if (add)
    2047              :     {
    2048            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2049            0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2050            0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2051            0 :                                           build_int_cst (lhs_type, add));
    2052            0 :       gimple_set_location (pattern_stmt, loc);
    2053            0 :       new_var = ret_var;
    2054              :     }
    2055              : 
    2056          286 :   if (defined_at_zero
    2057          210 :       && (!defined_at_zero_new || val != val_cmp))
    2058              :     {
    2059           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2060           43 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2061           43 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2062           43 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2063           43 :       tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2064           43 :       pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
    2065              :                                           build_zero_cst (rhs_type));
    2066           43 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    2067              :                               truth_type_for (vec_type), rhs_type);
    2068           43 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2069              :                                           new_var,
    2070           43 :                                           build_int_cst (lhs_type, val));
    2071              :     }
    2072              : 
    2073          286 :   if (dump_enabled_p ())
    2074           36 :     dump_printf_loc (MSG_NOTE, vect_location,
    2075              :                      "created pattern stmt: %G", pattern_stmt);
    2076              : 
    2077              :   return pattern_stmt;
    2078              : }
    2079              : 
    2080              : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2081              : 
    2082              :    Try to find the following pattern:
    2083              : 
    2084              :    UTYPE1 A;
    2085              :    TYPE1 B;
    2086              :    UTYPE2 temp_in;
    2087              :    TYPE3 temp_out;
    2088              :    temp_in = (UTYPE2)A;
    2089              : 
    2090              :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2091              :    B = (TYPE1) temp_out;
    2092              : 
    2093              :    TYPE2 may or may not be equal to TYPE3.
    2094              :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2095              :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2096              : 
    2097              :    Input:
    2098              : 
    2099              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2100              :    here it starts with B = (TYPE1) temp_out;
    2101              : 
    2102              :    Output:
    2103              : 
    2104              :    * TYPE_OUT: The vector type of the output of this pattern.
    2105              : 
    2106              :    * Return value: A new stmt that will be used to replace the sequence of
    2107              :    stmts that constitute the pattern. In this case it will be:
    2108              :    B = .POPCOUNT (A);
    2109              : 
    2110              :    Similarly for clz, ctz and ffs.
    2111              : */
    2112              : 
    2113              : static gimple *
    2114     30580080 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2115              :                                          stmt_vec_info stmt_vinfo,
    2116              :                                          tree *type_out)
    2117              : {
    2118     30580080 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2119     20904665 :   gimple *call_stmt, *pattern_stmt;
    2120     20904665 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2121     51484427 :   internal_fn ifn = IFN_LAST;
    2122     30579762 :   int addend = 0;
    2123              : 
    2124              :   /* Find B = (TYPE1) temp_out. */
    2125     20904665 :   if (!last_stmt)
    2126              :     return NULL;
    2127     20904665 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2128     20904665 :   if (!CONVERT_EXPR_CODE_P (code))
    2129              :     return NULL;
    2130              : 
    2131      2856548 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2132      2856548 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2133      2856548 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2134              :     return NULL;
    2135              : 
    2136      2675242 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2137      2675242 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2138      2675242 :       || !has_single_use (rhs_oprnd))
    2139              :     return NULL;
    2140      1368816 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2141              : 
    2142              :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2143      1368816 :   if (!is_gimple_call (call_stmt))
    2144              :     return NULL;
    2145       100791 :   switch (gimple_call_combined_fn (call_stmt))
    2146              :     {
    2147              :       int val;
    2148              :     CASE_CFN_POPCOUNT:
    2149              :       ifn = IFN_POPCOUNT;
    2150              :       break;
    2151         2115 :     CASE_CFN_CLZ:
    2152         2115 :       ifn = IFN_CLZ;
    2153              :       /* Punt if call result is unsigned and defined value at zero
    2154              :          is negative, as the negative value doesn't extend correctly.  */
    2155         2115 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2156            0 :           && gimple_call_internal_p (call_stmt)
    2157         2115 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2158              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2159         2115 :           && val < 0)
    2160              :         return NULL;
    2161              :       break;
    2162          691 :     CASE_CFN_CTZ:
    2163          691 :       ifn = IFN_CTZ;
    2164              :       /* Punt if call result is unsigned and defined value at zero
    2165              :          is negative, as the negative value doesn't extend correctly.  */
    2166          691 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2167            0 :           && gimple_call_internal_p (call_stmt)
    2168          691 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2169              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2170          691 :           && val < 0)
    2171              :         return NULL;
    2172              :       break;
    2173           57 :     CASE_CFN_FFS:
    2174           57 :       ifn = IFN_FFS;
    2175           57 :       break;
    2176              :     default:
    2177              :       return NULL;
    2178              :     }
    2179              : 
    2180         3165 :   if (gimple_call_num_args (call_stmt) != 1
    2181         3165 :       && gimple_call_num_args (call_stmt) != 2)
    2182              :     return NULL;
    2183              : 
    2184         3165 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2185         3165 :   vect_unpromoted_value unprom_diff;
    2186         3165 :   rhs_origin
    2187         3165 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2188              : 
    2189         3165 :   if (!rhs_origin)
    2190              :     return NULL;
    2191              : 
    2192              :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2193         3155 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2194              :     return NULL;
    2195              : 
    2196              :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2197              :      different builtins/internal functions have different behaviors.  */
    2198         1598 :   if (TYPE_PRECISION (unprom_diff.type)
    2199         1598 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2200          264 :     switch (ifn)
    2201              :       {
    2202           95 :       case IFN_POPCOUNT:
    2203              :         /* For popcount require zero extension, which doesn't add any
    2204              :            further bits to the count.  */
    2205           95 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2206              :           return NULL;
    2207              :         break;
    2208          109 :       case IFN_CLZ:
    2209              :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2210              :            if it is undefined at zero or if it matches also for the
    2211              :            defined value there.  */
    2212          109 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2213              :           return NULL;
    2214          109 :         if (!type_has_mode_precision_p (lhs_type)
    2215          109 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2216            0 :           return NULL;
    2217          109 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2218          109 :                   - TYPE_PRECISION (lhs_type));
    2219          109 :         if (gimple_call_internal_p (call_stmt)
    2220          109 :             && gimple_call_num_args (call_stmt) == 2)
    2221              :           {
    2222            0 :             int val1, val2;
    2223            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2224            0 :             int d2
    2225            0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2226              :                                            val2);
    2227            0 :             if (d2 != 2 || val1 != val2 + addend)
    2228              :               return NULL;
    2229              :           }
    2230              :         break;
    2231           40 :       case IFN_CTZ:
    2232              :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2233              :            if it is undefined at zero or if it matches also for the
    2234              :            defined value there.  */
    2235           40 :         if (gimple_call_internal_p (call_stmt)
    2236           40 :             && gimple_call_num_args (call_stmt) == 2)
    2237              :           {
    2238            0 :             int val1, val2;
    2239            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2240            0 :             int d2
    2241            0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2242              :                                            val2);
    2243            0 :             if (d2 != 2 || val1 != val2)
    2244              :               return NULL;
    2245              :           }
    2246              :         break;
    2247              :       case IFN_FFS:
    2248              :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2249              :         break;
    2250            0 :       default:
    2251            0 :         gcc_unreachable ();
    2252              :       }
    2253              : 
    2254         1598 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2255              :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2256         1598 :   if (!vec_type)
    2257              :     return NULL;
    2258              : 
    2259         1473 :   bool supported
    2260         1473 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2261         1473 :   if (!supported)
    2262         1296 :     switch (ifn)
    2263              :       {
    2264              :       case IFN_POPCOUNT:
    2265              :       case IFN_CLZ:
    2266              :         return NULL;
    2267           57 :       case IFN_FFS:
    2268              :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2269           57 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2270              :                                             OPTIMIZE_FOR_SPEED))
    2271              :           break;
    2272              :         /* FALLTHRU */
    2273          504 :       case IFN_CTZ:
    2274              :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2275              :            clz or popcount.  */
    2276          504 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2277              :                                             OPTIMIZE_FOR_SPEED))
    2278              :           break;
    2279          444 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2280              :                                             OPTIMIZE_FOR_SPEED))
    2281              :           break;
    2282              :         return NULL;
    2283            0 :       default:
    2284            0 :         gcc_unreachable ();
    2285              :       }
    2286              : 
    2287          318 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2288              :                          call_stmt);
    2289              : 
    2290              :   /* Create B = .POPCOUNT (A).  */
    2291          318 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2292          318 :   tree arg2 = NULL_TREE;
    2293          318 :   int val;
    2294          318 :   if (ifn == IFN_CLZ
    2295          368 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2296              :                                     val) == 2)
    2297           48 :     arg2 = build_int_cst (integer_type_node, val);
    2298          270 :   else if (ifn == IFN_CTZ
    2299          363 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2300              :                                          val) == 2)
    2301           93 :     arg2 = build_int_cst (integer_type_node, val);
    2302          318 :   if (arg2)
    2303          141 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2304              :   else
    2305          177 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2306          318 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2307          318 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2308          318 :   *type_out = vec_type;
    2309              : 
    2310          318 :   if (dump_enabled_p ())
    2311           24 :     dump_printf_loc (MSG_NOTE, vect_location,
    2312              :                      "created pattern stmt: %G", pattern_stmt);
    2313              : 
    2314          318 :   if (addend)
    2315              :     {
    2316           12 :       gcc_assert (supported);
    2317           12 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2318           12 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2319           12 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2320           12 :                                           build_int_cst (lhs_type, addend));
    2321              :     }
    2322          306 :   else if (!supported)
    2323              :     {
    2324          141 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2325          141 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2326          141 :       pattern_stmt
    2327          141 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2328          141 :       if (pattern_stmt == NULL)
    2329              :         return NULL;
    2330          141 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2331              :         {
    2332          141 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2333          141 :           gimple_seq_add_seq_without_update (pseq, seq);
    2334              :         }
    2335              :     }
    2336              :   return pattern_stmt;
    2337              : }
    2338              : 
    2339              : /* Function vect_recog_pow_pattern
    2340              : 
    2341              :    Try to find the following pattern:
    2342              : 
    2343              :      x = POW (y, N);
    2344              : 
    2345              :    with POW being one of pow, powf, powi, powif and N being
    2346              :    either 2 or 0.5.
    2347              : 
    2348              :    Input:
    2349              : 
    2350              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2351              : 
    2352              :    Output:
    2353              : 
    2354              :    * TYPE_OUT: The type of the output of this pattern.
    2355              : 
    2356              :    * Return value: A new stmt that will be used to replace the sequence of
    2357              :    stmts that constitute the pattern. In this case it will be:
    2358              :         x = x * x
    2359              :    or
    2360              :         x = sqrt (x)
    2361              : */
    2362              : 
    2363              : static gimple *
    2364     30580095 : vect_recog_pow_pattern (vec_info *vinfo,
    2365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2366              : {
    2367     30580095 :   gimple *last_stmt = stmt_vinfo->stmt;
    2368     30580095 :   tree base, exp;
    2369     30580095 :   gimple *stmt;
    2370     30580095 :   tree var;
    2371              : 
    2372     30580095 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2373              :     return NULL;
    2374              : 
    2375      1495268 :   switch (gimple_call_combined_fn (last_stmt))
    2376              :     {
    2377          276 :     CASE_CFN_POW:
    2378          276 :     CASE_CFN_POWI:
    2379          276 :       break;
    2380              : 
    2381              :     default:
    2382              :       return NULL;
    2383              :     }
    2384              : 
    2385          276 :   base = gimple_call_arg (last_stmt, 0);
    2386          276 :   exp = gimple_call_arg (last_stmt, 1);
    2387          276 :   if (TREE_CODE (exp) != REAL_CST
    2388          249 :       && TREE_CODE (exp) != INTEGER_CST)
    2389              :     {
    2390          249 :       if (flag_unsafe_math_optimizations
    2391           37 :           && TREE_CODE (base) == REAL_CST
    2392          252 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2393              :         {
    2394            3 :           combined_fn log_cfn;
    2395            3 :           built_in_function exp_bfn;
    2396            3 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2397              :             {
    2398              :             case BUILT_IN_POW:
    2399              :               log_cfn = CFN_BUILT_IN_LOG;
    2400              :               exp_bfn = BUILT_IN_EXP;
    2401              :               break;
    2402            0 :             case BUILT_IN_POWF:
    2403            0 :               log_cfn = CFN_BUILT_IN_LOGF;
    2404            0 :               exp_bfn = BUILT_IN_EXPF;
    2405            0 :               break;
    2406            0 :             case BUILT_IN_POWL:
    2407            0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2408            0 :               exp_bfn = BUILT_IN_EXPL;
    2409            0 :               break;
    2410              :             default:
    2411              :               return NULL;
    2412              :             }
    2413            3 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2414            3 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2415              :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2416              :              does that, but if C is a power of 2, we want to use
    2417              :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2418              :              vectorization we don't have vectorized exp2.  */
    2419            3 :           if (logc
    2420            3 :               && TREE_CODE (logc) == REAL_CST
    2421            3 :               && exp_decl
    2422            6 :               && lookup_attribute ("omp declare simd",
    2423            3 :                                    DECL_ATTRIBUTES (exp_decl)))
    2424              :             {
    2425            3 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2426            3 :               if (node->simd_clones == NULL)
    2427              :                 {
    2428            2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2429            2 :                       || node->definition)
    2430              :                     return NULL;
    2431            2 :                   expand_simd_clones (node);
    2432            2 :                   if (node->simd_clones == NULL)
    2433              :                     return NULL;
    2434              :                 }
    2435            3 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2436            3 :               if (!*type_out)
    2437              :                 return NULL;
    2438            3 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2439            3 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2440            3 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2441            3 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2442            3 :               g = gimple_build_call (exp_decl, 1, def);
    2443            3 :               gimple_call_set_lhs (g, res);
    2444            3 :               return g;
    2445              :             }
    2446              :         }
    2447              : 
    2448          246 :       return NULL;
    2449              :     }
    2450              : 
    2451              :   /* We now have a pow or powi builtin function call with a constant
    2452              :      exponent.  */
    2453              : 
    2454              :   /* Catch squaring.  */
    2455           27 :   if ((tree_fits_shwi_p (exp)
    2456            0 :        && tree_to_shwi (exp) == 2)
    2457           27 :       || (TREE_CODE (exp) == REAL_CST
    2458           27 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2459              :     {
    2460            7 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2461            7 :                                             TREE_TYPE (base), type_out))
    2462              :         return NULL;
    2463              : 
    2464            7 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2465            7 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2466            7 :       return stmt;
    2467              :     }
    2468              : 
    2469              :   /* Catch square root.  */
    2470           20 :   if (TREE_CODE (exp) == REAL_CST
    2471           20 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2472              :     {
    2473           10 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2474           10 :       if (*type_out
    2475           10 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2476              :                                              OPTIMIZE_FOR_SPEED))
    2477              :         {
    2478            8 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2479            8 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2480            8 :           gimple_call_set_lhs (stmt, var);
    2481            8 :           gimple_call_set_nothrow (stmt, true);
    2482            8 :           return stmt;
    2483              :         }
    2484              :     }
    2485              : 
    2486              :   return NULL;
    2487              : }
    2488              : 
    2489              : 
    2490              : /* Function vect_recog_widen_sum_pattern
    2491              : 
    2492              :    Try to find the following pattern:
    2493              : 
    2494              :      type x_t;
    2495              :      TYPE x_T, sum = init;
    2496              :    loop:
    2497              :      sum_0 = phi <init, sum_1>
    2498              :      S1  x_t = *p;
    2499              :      S2  x_T = (TYPE) x_t;
    2500              :      S3  sum_1 = x_T + sum_0;
    2501              : 
    2502              :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2503              :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2504              :    a special case of a reduction computation.
    2505              : 
    2506              :    Input:
    2507              : 
    2508              :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2509              :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2510              : 
    2511              :    Output:
    2512              : 
    2513              :    * TYPE_OUT: The type of the output of this pattern.
    2514              : 
    2515              :    * Return value: A new stmt that will be used to replace the sequence of
    2516              :    stmts that constitute the pattern. In this case it will be:
    2517              :         WIDEN_SUM <x_t, sum_0>
    2518              : 
    2519              :    Note: The widening-sum idiom is a widening reduction pattern that is
    2520              :          vectorized without preserving all the intermediate results. It
    2521              :          produces only N/2 (widened) results (by summing up pairs of
    2522              :          intermediate results) rather than all N results.  Therefore, we
    2523              :          cannot allow this pattern when we want to get all the results and in
    2524              :          the correct order (as is the case when this computation is in an
    2525              :          inner-loop nested in an outer-loop that us being vectorized).  */
    2526              : 
    2527              : static gimple *
    2528     30580095 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2529              :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2530              : {
    2531     30580095 :   gimple *last_stmt = stmt_vinfo->stmt;
    2532     30580095 :   tree oprnd0, oprnd1;
    2533     30580095 :   tree type;
    2534     30580095 :   gimple *pattern_stmt;
    2535     30580095 :   tree var;
    2536              : 
    2537              :   /* Look for the following pattern
    2538              :           DX = (TYPE) X;
    2539              :           sum_1 = DX + sum_0;
    2540              :      In which DX is at least double the size of X, and sum_1 has been
    2541              :      recognized as a reduction variable.
    2542              :    */
    2543              : 
    2544              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2545              :      of the above pattern.  */
    2546              : 
    2547     30580095 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2548              :                                        &oprnd0, &oprnd1)
    2549        52156 :       || TREE_CODE (oprnd0) != SSA_NAME
    2550     30631982 :       || !vinfo->lookup_def (oprnd0))
    2551     30528273 :     return NULL;
    2552              : 
    2553        51822 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2554              : 
    2555              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2556              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2557              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2558              :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2559              :      'TYPE'.  */
    2560              : 
    2561        51822 :   vect_unpromoted_value unprom0;
    2562        51822 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2563        51822 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2564              :     return NULL;
    2565              : 
    2566         2275 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2567              : 
    2568         2275 :   if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2569              :                                       unprom0.type, type_out))
    2570              :     return NULL;
    2571              : 
    2572            0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2573            0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2574              : 
    2575            0 :   return pattern_stmt;
    2576              : }
    2577              : 
    2578              : /* Function vect_recog_bitfield_ref_pattern
    2579              : 
    2580              :    Try to find the following pattern:
    2581              : 
    2582              :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2583              :    result = (type_out) bf_value;
    2584              : 
    2585              :    or
    2586              : 
    2587              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2588              : 
    2589              :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2590              :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2591              : 
    2592              :    Input:
    2593              : 
    2594              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2595              :    here it starts with:
    2596              :    result = (type_out) bf_value;
    2597              : 
    2598              :    or
    2599              : 
    2600              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2601              : 
    2602              :    Output:
    2603              : 
    2604              :    * TYPE_OUT: The vector type of the output of this pattern.
    2605              : 
    2606              :    * Return value: A new stmt that will be used to replace the sequence of
    2607              :    stmts that constitute the pattern. If the precision of type_out is bigger
    2608              :    than the precision type of _1 we perform the widening before the shifting,
    2609              :    since the new precision will be large enough to shift the value and moving
    2610              :    widening operations up the statement chain enables the generation of
    2611              :    widening loads.  If we are widening and the operation after the pattern is
    2612              :    an addition then we mask first and shift later, to enable the generation of
    2613              :    shifting adds.  In the case of narrowing we will always mask first, shift
    2614              :    last and then perform a narrowing operation.  This will enable the
    2615              :    generation of narrowing shifts.
    2616              : 
    2617              :    Widening with mask first, shift later:
    2618              :    container = (type_out) container;
    2619              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2620              :    result = masked >> bitpos;
    2621              : 
    2622              :    Widening with shift first, mask last:
    2623              :    container = (type_out) container;
    2624              :    shifted = container >> bitpos;
    2625              :    result = shifted & ((1 << bitsize) - 1);
    2626              : 
    2627              :    Narrowing:
    2628              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2629              :    result = masked >> bitpos;
    2630              :    result = (type_out) result;
    2631              : 
    2632              :    If the bitfield is signed and it's wider than type_out, we need to
    2633              :    keep the result sign-extended:
    2634              :    container = (type) container;
    2635              :    masked = container << (prec - bitsize - bitpos);
    2636              :    result = (type_out) (masked >> (prec - bitsize));
    2637              : 
    2638              :    Here type is the signed variant of the wider of type_out and the type
    2639              :    of container.
    2640              : 
    2641              :    The shifting is always optional depending on whether bitpos != 0.
    2642              : 
    2643              :    When the original bitfield was inside a gcond then an new gcond is also
    2644              :    generated with the newly `result` as the operand to the comparison.
    2645              : 
    2646              : */
    2647              : 
    2648              : static gimple *
    2649     30526219 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2650              :                                  tree *type_out)
    2651              : {
    2652     30526219 :   gimple *bf_stmt = NULL;
    2653     30526219 :   tree lhs = NULL_TREE;
    2654     30526219 :   tree ret_type = NULL_TREE;
    2655     30526219 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2656     30526219 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2657              :     {
    2658      5130997 :       tree op = gimple_cond_lhs (cond_stmt);
    2659      5130997 :       if (TREE_CODE (op) != SSA_NAME)
    2660              :         return NULL;
    2661      5130690 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2662      5130690 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2663              :         return NULL;
    2664              :     }
    2665     25395222 :   else if (is_gimple_assign (stmt)
    2666     20850485 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2667     28176346 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2668              :     {
    2669      2740535 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2670      2740535 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2671      2740535 :       lhs = gimple_assign_lhs (stmt);
    2672      2740535 :       ret_type = TREE_TYPE (lhs);
    2673              :     }
    2674              : 
    2675      6059175 :   if (!bf_stmt
    2676      6059175 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2677              :     return NULL;
    2678              : 
    2679        15274 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2680        15274 :   tree container = TREE_OPERAND (bf_ref, 0);
    2681        15274 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2682              : 
    2683        15274 :   if (!bit_field_offset (bf_ref).is_constant ()
    2684        15274 :       || !bit_field_size (bf_ref).is_constant ()
    2685        15274 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2686              :     return NULL;
    2687              : 
    2688        30170 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2689        15272 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2690        17431 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2691        13117 :     return NULL;
    2692              : 
    2693         2157 :   gimple *use_stmt, *pattern_stmt;
    2694         2157 :   use_operand_p use_p;
    2695         2157 :   bool shift_first = true;
    2696         2157 :   tree container_type = TREE_TYPE (container);
    2697         2157 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2698              : 
    2699              :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2700              :      the container may change and we have to consider offset change for
    2701              :      widening loads on big endianness.  The shift_n calculated here can be
    2702              :      independent of widening.  */
    2703         2157 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2704         2157 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2705         2157 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2706         2157 :   if (BYTES_BIG_ENDIAN)
    2707              :     shift_n = prec - shift_n - mask_width;
    2708              : 
    2709         2157 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2710         1394 :                    TYPE_PRECISION (ret_type) > mask_width);
    2711         2157 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2712         2157 :                      TYPE_PRECISION (ret_type));
    2713              : 
    2714              :   /* We move the conversion earlier if the loaded type is smaller than the
    2715              :      return type to enable the use of widening loads.  And if we need a
    2716              :      sign extension, we need to convert the loaded value early to a signed
    2717              :      type as well.  */
    2718         2157 :   if (ref_sext || load_widen)
    2719              :     {
    2720          941 :       tree type = load_widen ? ret_type : container_type;
    2721          941 :       if (ref_sext)
    2722          902 :         type = gimple_signed_type (type);
    2723          941 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2724              :                                           NOP_EXPR, container);
    2725          941 :       container = gimple_get_lhs (pattern_stmt);
    2726          941 :       container_type = TREE_TYPE (container);
    2727          941 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2728          941 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2729          941 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2730              :     }
    2731         1216 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2732              :     /* If we are doing the conversion last then also delay the shift as we may
    2733              :        be able to combine the shift and conversion in certain cases.  */
    2734              :     shift_first = false;
    2735              : 
    2736              :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2737              :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2738              :      add into a single instruction.  */
    2739         1416 :   if (lhs && !is_pattern_stmt_p (stmt_info)
    2740         3573 :       && single_imm_use (lhs, &use_p, &use_stmt))
    2741              :     {
    2742         1049 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2743         1049 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2744              :         shift_first = false;
    2745              :     }
    2746              : 
    2747              :   /* If we don't have to shift we only generate the mask, so just fix the
    2748              :      code-path to shift_first.  */
    2749         2157 :   if (shift_n == 0)
    2750          756 :     shift_first = true;
    2751              : 
    2752         2157 :   tree result;
    2753         2157 :   if (shift_first && !ref_sext)
    2754              :     {
    2755          503 :       tree shifted = container;
    2756          503 :       if (shift_n)
    2757              :         {
    2758           59 :           pattern_stmt
    2759           59 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2760              :                                    RSHIFT_EXPR, container,
    2761           59 :                                    build_int_cst (sizetype, shift_n));
    2762           59 :           shifted = gimple_assign_lhs (pattern_stmt);
    2763           59 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2764              :         }
    2765              : 
    2766          503 :       tree mask = wide_int_to_tree (container_type,
    2767          503 :                                     wi::mask (mask_width, false, prec));
    2768              : 
    2769          503 :       pattern_stmt
    2770          503 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2771              :                                BIT_AND_EXPR, shifted, mask);
    2772          503 :       result = gimple_assign_lhs (pattern_stmt);
    2773              :     }
    2774              :   else
    2775              :     {
    2776         1654 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2777         1654 :       if (!ref_sext)
    2778              :         {
    2779          752 :           tree mask = wide_int_to_tree (container_type,
    2780          752 :                                         wi::shifted_mask (shift_n,
    2781              :                                                           mask_width,
    2782              :                                                           false, prec));
    2783          752 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2784              :                                               container, mask);
    2785              :         }
    2786              :       else
    2787              :         {
    2788          902 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2789          902 :           shift_n += shl;
    2790          902 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2791              :                                               container,
    2792              :                                               build_int_cst (sizetype,
    2793          902 :                                                              shl));
    2794              :         }
    2795              : 
    2796         1654 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2797         1654 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2798         1654 :       pattern_stmt
    2799         1654 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2800              :                                RSHIFT_EXPR, masked,
    2801         1654 :                                build_int_cst (sizetype, shift_n));
    2802         1654 :       result = gimple_assign_lhs (pattern_stmt);
    2803              :     }
    2804              : 
    2805         2157 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2806              :     {
    2807         1438 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2808         1438 :       pattern_stmt
    2809         1438 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2810              :                                NOP_EXPR, result);
    2811              :     }
    2812              : 
    2813         2157 :   if (!lhs)
    2814              :     {
    2815          741 :       if (!vectype)
    2816              :         return NULL;
    2817              : 
    2818          603 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2819          603 :       vectype = truth_type_for (vectype);
    2820              : 
    2821              :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2822              :                 same way as vect_recog_gcond_pattern does.  However because
    2823              :                 patterns cannot match the same root twice,  when we handle and
    2824              :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2825              :                 apply anymore.  We should really fix it so that we don't need to
    2826              :                 duplicate transformations like these.  */
    2827          603 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2828          603 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2829          603 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2830          603 :       gimple *new_stmt
    2831          603 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2832              :                                gimple_get_lhs (pattern_stmt),
    2833              :                                fold_convert (container_type, cond_cst));
    2834          603 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2835          603 :       pattern_stmt
    2836          603 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2837          603 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2838              :                              NULL_TREE, NULL_TREE);
    2839              :     }
    2840              : 
    2841         2019 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2842         2019 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2843              : 
    2844         2019 :   return pattern_stmt;
    2845              : }
    2846              : 
    2847              : /* Function vect_recog_bit_insert_pattern
    2848              : 
    2849              :    Try to find the following pattern:
    2850              : 
    2851              :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2852              : 
    2853              :    Input:
    2854              : 
    2855              :    * STMT_VINFO: The stmt we want to replace.
    2856              : 
    2857              :    Output:
    2858              : 
    2859              :    * TYPE_OUT: The vector type of the output of this pattern.
    2860              : 
    2861              :    * Return value: A new stmt that will be used to replace the sequence of
    2862              :    stmts that constitute the pattern. In this case it will be:
    2863              :    value = (container_type) value;          // Make sure
    2864              :    shifted = value << bitpos;                 // Shift value into place
    2865              :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2866              :                                             // the 'to-write value'.
    2867              :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2868              :                                             // write to from the value we want
    2869              :                                             // to write to.
    2870              :    written = cleared | masked;              // Write bits.
    2871              : 
    2872              : 
    2873              :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2874              :    bits corresponding to the real size of the bitfield value we are writing to.
    2875              :    The shifting is always optional depending on whether bitpos != 0.
    2876              : 
    2877              : */
    2878              : 
    2879              : static gimple *
    2880     30529229 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2881              :                                tree *type_out)
    2882              : {
    2883     30529229 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2884     27872035 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2885              :     return NULL;
    2886              : 
    2887          598 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2888          598 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2889          598 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2890              : 
    2891          598 :   tree bf_type = TREE_TYPE (value);
    2892          598 :   tree container_type = TREE_TYPE (container);
    2893              : 
    2894          598 :   if (!INTEGRAL_TYPE_P (container_type)
    2895          598 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2896              :     return NULL;
    2897              : 
    2898          500 :   gimple *pattern_stmt;
    2899              : 
    2900          500 :   vect_unpromoted_value unprom;
    2901          500 :   unprom.set_op (value, vect_internal_def);
    2902          500 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2903              :                               get_vectype_for_scalar_type (vinfo,
    2904              :                                                            container_type));
    2905              : 
    2906          500 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2907          500 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2908          500 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2909          500 :   if (BYTES_BIG_ENDIAN)
    2910              :     {
    2911              :       shift_n = prec - shift_n - mask_width;
    2912              :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2913              :     }
    2914              : 
    2915          500 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2916              :     {
    2917            0 :       pattern_stmt =
    2918            0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2919              :                              NOP_EXPR, value);
    2920            0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2921            0 :       value = gimple_get_lhs (pattern_stmt);
    2922              :     }
    2923              : 
    2924              :   /* Shift VALUE into place.  */
    2925          500 :   tree shifted = value;
    2926          500 :   if (shift_n)
    2927              :     {
    2928          249 :       gimple_seq stmts = NULL;
    2929          249 :       shifted
    2930          249 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2931          249 :       if (!gimple_seq_empty_p (stmts))
    2932          112 :         append_pattern_def_seq (vinfo, stmt_info,
    2933              :                                 gimple_seq_first_stmt (stmts));
    2934              :     }
    2935              : 
    2936          500 :   tree mask_t
    2937          500 :     = wide_int_to_tree (container_type,
    2938          500 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2939              : 
    2940              :   /* Clear bits we don't want to write back from SHIFTED.  */
    2941          500 :   gimple_seq stmts = NULL;
    2942          500 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2943              :                               mask_t);
    2944          500 :   if (!gimple_seq_empty_p (stmts))
    2945              :     {
    2946          110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2947          110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2948              :     }
    2949              : 
    2950              :   /* Mask off the bits in the container that we are to write to.  */
    2951          500 :   mask_t = wide_int_to_tree (container_type,
    2952          500 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2953          500 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2954          500 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2955          500 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2956              : 
    2957              :   /* Write MASKED into CLEARED.  */
    2958          500 :   pattern_stmt
    2959          500 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2960              :                            BIT_IOR_EXPR, cleared, masked);
    2961              : 
    2962          500 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2963          500 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2964              : 
    2965          500 :   return pattern_stmt;
    2966              : }
    2967              : 
    2968              : 
    2969              : /* Recognize cases in which an operation is performed in one type WTYPE
    2970              :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2971              :    if we have:
    2972              : 
    2973              :      ATYPE a;  // narrower than NTYPE
    2974              :      BTYPE b;  // narrower than NTYPE
    2975              :      WTYPE aw = (WTYPE) a;
    2976              :      WTYPE bw = (WTYPE) b;
    2977              :      WTYPE res = aw + bw;  // only uses of aw and bw
    2978              : 
    2979              :    then it would be more efficient to do:
    2980              : 
    2981              :      NTYPE an = (NTYPE) a;
    2982              :      NTYPE bn = (NTYPE) b;
    2983              :      NTYPE resn = an + bn;
    2984              :      WTYPE res = (WTYPE) resn;
    2985              : 
    2986              :    Other situations include things like:
    2987              : 
    2988              :      ATYPE a;  // NTYPE or narrower
    2989              :      WTYPE aw = (WTYPE) a;
    2990              :      WTYPE res = aw + b;
    2991              : 
    2992              :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2993              :    to truncate "b" and do the operation on NTYPE instead:
    2994              : 
    2995              :      NTYPE an = (NTYPE) a;
    2996              :      NTYPE bn = (NTYPE) b;  // truncation
    2997              :      NTYPE resn = an + bn;
    2998              :      WTYPE res = (WTYPE) resn;
    2999              : 
    3000              :    All users of "res" should then use "resn" instead, making the final
    3001              :    statement dead (not marked as relevant).  The final statement is still
    3002              :    needed to maintain the type correctness of the IR.
    3003              : 
    3004              :    vect_determine_precisions has already determined the minimum
    3005              :    precison of the operation and the minimum precision required
    3006              :    by users of the result.  */
    3007              : 
    3008              : static gimple *
    3009     30529669 : vect_recog_over_widening_pattern (vec_info *vinfo,
    3010              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3011              : {
    3012     30529669 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3013     20854538 :   if (!last_stmt)
    3014              :     return NULL;
    3015              : 
    3016              :   /* See whether we have found that this operation can be done on a
    3017              :      narrower type without changing its semantics.  */
    3018     20854538 :   unsigned int new_precision = last_stmt_info->operation_precision;
    3019     20854538 :   if (!new_precision)
    3020              :     return NULL;
    3021              : 
    3022      1621470 :   tree lhs = gimple_assign_lhs (last_stmt);
    3023      1621470 :   tree type = TREE_TYPE (lhs);
    3024      1621470 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3025              : 
    3026              :   /* Punt for reductions where we don't handle the type conversions.  */
    3027      1621470 :   if (vect_is_reduction (last_stmt_info))
    3028              :     return NULL;
    3029              : 
    3030              :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    3031              :      operands are interesting.  */
    3032      1613121 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    3033              : 
    3034              :   /* Check the operands.  */
    3035      1613121 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    3036      1613121 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    3037      1613121 :   unprom.quick_grow_cleared (nops);
    3038      1613121 :   unsigned int min_precision = 0;
    3039      1613121 :   bool single_use_p = false;
    3040      4820062 :   for (unsigned int i = 0; i < nops; ++i)
    3041              :     {
    3042      3208475 :       tree op = gimple_op (last_stmt, first_op + i);
    3043      3208475 :       if (TREE_CODE (op) == INTEGER_CST)
    3044      1468032 :         unprom[i].set_op (op, vect_constant_def);
    3045      1740443 :       else if (TREE_CODE (op) == SSA_NAME)
    3046              :         {
    3047      1740443 :           bool op_single_use_p = true;
    3048      1740443 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3049              :                                                      &op_single_use_p))
    3050         1534 :             return NULL;
    3051              :           /* If:
    3052              : 
    3053              :              (1) N bits of the result are needed;
    3054              :              (2) all inputs are widened from M<N bits; and
    3055              :              (3) one operand OP is a single-use SSA name
    3056              : 
    3057              :              we can shift the M->N widening from OP to the output
    3058              :              without changing the number or type of extensions involved.
    3059              :              This then reduces the number of copies of STMT_INFO.
    3060              : 
    3061              :              If instead of (3) more than one operand is a single-use SSA name,
    3062              :              shifting the extension to the output is even more of a win.
    3063              : 
    3064              :              If instead:
    3065              : 
    3066              :              (1) N bits of the result are needed;
    3067              :              (2) one operand OP2 is widened from M2<N bits;
    3068              :              (3) another operand OP1 is widened from M1<M2 bits; and
    3069              :              (4) both OP1 and OP2 are single-use
    3070              : 
    3071              :              the choice is between:
    3072              : 
    3073              :              (a) truncating OP2 to M1, doing the operation on M1,
    3074              :                  and then widening the result to N
    3075              : 
    3076              :              (b) widening OP1 to M2, doing the operation on M2, and then
    3077              :                  widening the result to N
    3078              : 
    3079              :              Both shift the M2->N widening of the inputs to the output.
    3080              :              (a) additionally shifts the M1->M2 widening to the output;
    3081              :              it requires fewer copies of STMT_INFO but requires an extra
    3082              :              M2->M1 truncation.
    3083              : 
    3084              :              Which is better will depend on the complexity and cost of
    3085              :              STMT_INFO, which is hard to predict at this stage.  However,
    3086              :              a clear tie-breaker in favor of (b) is the fact that the
    3087              :              truncation in (a) increases the length of the operation chain.
    3088              : 
    3089              :              If instead of (4) only one of OP1 or OP2 is single-use,
    3090              :              (b) is still a win over doing the operation in N bits:
    3091              :              it still shifts the M2->N widening on the single-use operand
    3092              :              to the output and reduces the number of STMT_INFO copies.
    3093              : 
    3094              :              If neither operand is single-use then operating on fewer than
    3095              :              N bits might lead to more extensions overall.  Whether it does
    3096              :              or not depends on global information about the vectorization
    3097              :              region, and whether that's a good trade-off would again
    3098              :              depend on the complexity and cost of the statements involved,
    3099              :              as well as things like register pressure that are not normally
    3100              :              modelled at this stage.  We therefore ignore these cases
    3101              :              and just optimize the clear single-use wins above.
    3102              : 
    3103              :              Thus we take the maximum precision of the unpromoted operands
    3104              :              and record whether any operand is single-use.  */
    3105      1738909 :           if (unprom[i].dt == vect_internal_def)
    3106              :             {
    3107      1001911 :               min_precision = MAX (min_precision,
    3108              :                                    TYPE_PRECISION (unprom[i].type));
    3109      1001911 :               single_use_p |= op_single_use_p;
    3110              :             }
    3111              :         }
    3112              :       else
    3113              :         return NULL;
    3114              :     }
    3115              : 
    3116              :   /* Although the operation could be done in operation_precision, we have
    3117              :      to balance that against introducing extra truncations or extensions.
    3118              :      Calculate the minimum precision that can be handled efficiently.
    3119              : 
    3120              :      The loop above determined that the operation could be handled
    3121              :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3122              :      extension from the inputs to the output without introducing more
    3123              :      instructions, and would reduce the number of instructions required
    3124              :      for STMT_INFO itself.
    3125              : 
    3126              :      vect_determine_precisions has also determined that the result only
    3127              :      needs min_output_precision bits.  Truncating by a factor of N times
    3128              :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3129              :      than min_output_precision, doing the operation in TYPE and truncating
    3130              :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3131              :      In contrast:
    3132              : 
    3133              :      - truncating the input to a unary operation and doing the operation
    3134              :        in the new type requires at most N - 1 + 1 = N instructions per
    3135              :        output vector
    3136              : 
    3137              :      - doing the same for a binary operation requires at most
    3138              :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3139              : 
    3140              :      Both unary and binary operations require fewer instructions than
    3141              :      this if the operands were extended from a suitable truncated form.
    3142              :      Thus there is usually nothing to lose by doing operations in
    3143              :      min_output_precision bits, but there can be something to gain.  */
    3144      1611587 :   if (!single_use_p)
    3145      1279012 :     min_precision = last_stmt_info->min_output_precision;
    3146              :   else
    3147       332575 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3148              : 
    3149              :   /* Apply the minimum efficient precision we just calculated.  */
    3150      1611587 :   if (new_precision < min_precision)
    3151              :     new_precision = min_precision;
    3152      1611587 :   new_precision = vect_element_precision (new_precision);
    3153      1611587 :   if (new_precision >= TYPE_PRECISION (type))
    3154              :     return NULL;
    3155              : 
    3156       142823 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3157              : 
    3158       142823 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3159       142823 :   if (!*type_out)
    3160              :     return NULL;
    3161              : 
    3162              :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3163       127692 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3164       127692 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3165              : 
    3166              :   /* If we're truncating an operation, we need to make sure that we
    3167              :      don't introduce new undefined overflow.  The codes tested here are
    3168              :      a subset of those accepted by vect_truncatable_operation_p.  */
    3169       127692 :   tree op_type = new_type;
    3170       127692 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3171       167304 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3172        27706 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3173              : 
    3174       127692 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3175       127692 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3176       127692 :   if (!new_vectype || !op_vectype)
    3177              :     return NULL;
    3178              : 
    3179              :   /* Verify we can handle the new operation.  For shifts and rotates
    3180              :      apply heuristic of whether we are likely facing vector-vector or
    3181              :      vector-scalar operation.  Since we are eventually expecting that
    3182              :      a later pattern might eventually want to rewrite an unsupported
    3183              :      into a supported case error on that side in case the original
    3184              :      operation was not supported either or this is a binary operation
    3185              :      and the 2nd operand is constant.  */
    3186       127692 :   if (code == RSHIFT_EXPR || code == LSHIFT_EXPR || code == RROTATE_EXPR)
    3187              :     {
    3188        26985 :       if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3189        25932 :           && ((unprom[1].dt != vect_external_def
    3190        25520 :                && unprom[1].dt != vect_constant_def)
    3191        17205 :               || !target_has_vecop_for_code (code, op_vectype, optab_scalar))
    3192        35741 :           && !(!target_has_vecop_for_code (code, *type_out, optab_vector)
    3193         7770 :                && ((unprom[1].dt != vect_external_def
    3194         7770 :                     || unprom[1].dt != vect_constant_def)
    3195              :                    || !target_has_vecop_for_code (code, *type_out,
    3196              :                                                   optab_scalar))))
    3197              :         return NULL;
    3198              :     }
    3199       100707 :   else if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3200       100707 :            && (target_has_vecop_for_code (code, *type_out, optab_vector)
    3201           27 :                && !(nops == 2 && unprom[1].dt == vect_constant_def)))
    3202              :     return NULL;
    3203              : 
    3204       126697 :   if (dump_enabled_p ())
    3205         4331 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3206              :                      type, new_type);
    3207              : 
    3208              :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3209       126697 :   tree ops[3] = {};
    3210       127119 :   for (unsigned int i = 1; i < first_op; ++i)
    3211          422 :     ops[i - 1] = gimple_op (last_stmt, i);
    3212       126697 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3213       126697 :                        op_type, &unprom[0], op_vectype);
    3214              : 
    3215              :   /* Use the operation to produce a result of type OP_TYPE.  */
    3216       126697 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3217       126697 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3218              :                                               ops[0], ops[1], ops[2]);
    3219       126697 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3220              : 
    3221       126697 :   if (dump_enabled_p ())
    3222         4331 :     dump_printf_loc (MSG_NOTE, vect_location,
    3223              :                      "created pattern stmt: %G", pattern_stmt);
    3224              : 
    3225              :   /* Convert back to the original signedness, if OP_TYPE is different
    3226              :      from NEW_TYPE.  */
    3227       126697 :   if (op_type != new_type)
    3228        27700 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3229              :                                         pattern_stmt, op_vectype);
    3230              : 
    3231              :   /* Promote the result to the original type.  */
    3232       126697 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3233              :                                       pattern_stmt, new_vectype);
    3234              : 
    3235       126697 :   return pattern_stmt;
    3236      1613121 : }
    3237              : 
    3238              : /* Recognize the following patterns:
    3239              : 
    3240              :      ATYPE a;  // narrower than TYPE
    3241              :      BTYPE b;  // narrower than TYPE
    3242              : 
    3243              :    1) Multiply high with scaling
    3244              :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3245              :      Here, c is bitsize (TYPE) / 2 - 1.
    3246              : 
    3247              :    2) ... or also with rounding
    3248              :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3249              :      Here, d is bitsize (TYPE) / 2 - 2.
    3250              : 
    3251              :    3) Normal multiply high
    3252              :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3253              :      Here, e is bitsize (TYPE) / 2.
    3254              : 
    3255              :    where only the bottom half of res is used.  */
    3256              : 
    3257              : static gimple *
    3258     30649244 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3259              :                           stmt_vec_info last_stmt_info, tree *type_out)
    3260              : {
    3261              :   /* Check for a right shift.  */
    3262     30649244 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3263     20973955 :   if (!last_stmt
    3264     20973955 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3265              :     return NULL;
    3266              : 
    3267              :   /* Check that the shift result is wider than the users of the
    3268              :      result need (i.e. that narrowing would be a natural choice).  */
    3269       351282 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3270       351282 :   unsigned int target_precision
    3271       351282 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3272       351282 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3273       351282 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3274              :     return NULL;
    3275              : 
    3276              :   /* Look through any change in sign on the outer shift input.  */
    3277        44010 :   vect_unpromoted_value unprom_rshift_input;
    3278        44010 :   tree rshift_input = vect_look_through_possible_promotion
    3279        44010 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3280        44010 :   if (!rshift_input
    3281        44010 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3282        43412 :            != TYPE_PRECISION (lhs_type))
    3283              :     return NULL;
    3284              : 
    3285              :   /* Get the definition of the shift input.  */
    3286        40114 :   stmt_vec_info rshift_input_stmt_info
    3287        40114 :     = vect_get_internal_def (vinfo, rshift_input);
    3288        40114 :   if (!rshift_input_stmt_info)
    3289              :     return NULL;
    3290        35388 :   gassign *rshift_input_stmt
    3291     30681123 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3292        32023 :   if (!rshift_input_stmt)
    3293              :     return NULL;
    3294              : 
    3295        32023 :   stmt_vec_info mulh_stmt_info;
    3296        32023 :   tree scale_term;
    3297        32023 :   bool rounding_p = false;
    3298              : 
    3299              :   /* Check for the presence of the rounding term.  */
    3300        39122 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3301              :     {
    3302              :       /* Check that the outer shift was by 1.  */
    3303        18974 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3304         9420 :         return NULL;
    3305              : 
    3306              :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3307         1304 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3308              :         return NULL;
    3309              : 
    3310              :       /* Look through any change in sign on the addition input.  */
    3311          110 :       vect_unpromoted_value unprom_plus_input;
    3312          110 :       tree plus_input = vect_look_through_possible_promotion
    3313          110 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3314          110 :       if (!plus_input
    3315          110 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3316          110 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3317              :         return NULL;
    3318              : 
    3319              :       /* Get the definition of the multiply-high-scale part.  */
    3320          110 :       stmt_vec_info plus_input_stmt_info
    3321          110 :         = vect_get_internal_def (vinfo, plus_input);
    3322          110 :       if (!plus_input_stmt_info)
    3323              :         return NULL;
    3324          110 :       gassign *plus_input_stmt
    3325         9530 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3326          110 :       if (!plus_input_stmt
    3327          110 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3328              :         return NULL;
    3329              : 
    3330              :       /* Look through any change in sign on the scaling input.  */
    3331           67 :       vect_unpromoted_value unprom_scale_input;
    3332           67 :       tree scale_input = vect_look_through_possible_promotion
    3333           67 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3334           67 :       if (!scale_input
    3335           67 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3336           67 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3337              :         return NULL;
    3338              : 
    3339              :       /* Get the definition of the multiply-high part.  */
    3340           67 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3341           67 :       if (!mulh_stmt_info)
    3342              :         return NULL;
    3343              : 
    3344              :       /* Get the scaling term.  */
    3345           67 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3346           67 :       rounding_p = true;
    3347              :     }
    3348              :   else
    3349              :     {
    3350        22536 :       mulh_stmt_info = rshift_input_stmt_info;
    3351        22536 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3352              :     }
    3353              : 
    3354              :   /* Check that the scaling factor is constant.  */
    3355        22603 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3356              :     return NULL;
    3357              : 
    3358              :   /* Check whether the scaling input term can be seen as two widened
    3359              :      inputs multiplied together.  */
    3360        64959 :   vect_unpromoted_value unprom_mult[2];
    3361        21653 :   tree new_type;
    3362        21653 :   unsigned int nops
    3363        21653 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3364              :                             false, 2, unprom_mult, &new_type);
    3365        21653 :   if (nops != 2)
    3366              :     return NULL;
    3367              : 
    3368              :   /* Adjust output precision.  */
    3369         1218 :   if (TYPE_PRECISION (new_type) < target_precision)
    3370            0 :     new_type = build_nonstandard_integer_type
    3371            0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3372              : 
    3373         1218 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3374         1218 :   internal_fn ifn;
    3375              :   /* Check that the scaling factor is expected.  Instead of
    3376              :      target_precision, we should use the one that we actually
    3377              :      use for internal function.  */
    3378         1218 :   if (rounding_p)
    3379              :     {
    3380              :       /* Check pattern 2).  */
    3381          134 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3382          201 :           != TYPE_PRECISION (lhs_type))
    3383              :         return NULL;
    3384              : 
    3385              :       ifn = IFN_MULHRS;
    3386              :     }
    3387              :   else
    3388              :     {
    3389              :       /* Check for pattern 1).  */
    3390         2302 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3391         3453 :           == TYPE_PRECISION (lhs_type))
    3392              :         ifn = IFN_MULHS;
    3393              :       /* Check for pattern 3).  */
    3394         1117 :       else if (wi::to_widest (scale_term) + mult_precision
    3395         2234 :                == TYPE_PRECISION (lhs_type))
    3396              :         ifn = IFN_MULH;
    3397              :       else
    3398              :         return NULL;
    3399              :     }
    3400              : 
    3401         1155 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3402              : 
    3403              :   /* Check for target support.  */
    3404         1155 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3405         1155 :   if (!new_vectype
    3406         2291 :       || !direct_internal_fn_supported_p
    3407         1136 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3408         1011 :     return NULL;
    3409              : 
    3410              :   /* The IR requires a valid vector type for the cast result, even though
    3411              :      it's likely to be discarded.  */
    3412          144 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3413          144 :   if (!*type_out)
    3414              :     return NULL;
    3415              : 
    3416              :   /* Generate the IFN_MULHRS call.  */
    3417          144 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3418          144 :   tree new_ops[2];
    3419          144 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3420              :                        unprom_mult, new_vectype);
    3421          144 :   gcall *mulhrs_stmt
    3422          144 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3423          144 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3424          144 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3425              : 
    3426          144 :   if (dump_enabled_p ())
    3427            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3428              :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3429              : 
    3430          144 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3431          144 :                               mulhrs_stmt, new_vectype);
    3432              : }
    3433              : 
    3434              : /* Recognize the patterns:
    3435              : 
    3436              :             ATYPE a;  // narrower than TYPE
    3437              :             BTYPE b;  // narrower than TYPE
    3438              :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3439              :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3440              : 
    3441              :    where only the bottom half of avg is used.  Try to transform them into:
    3442              : 
    3443              :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3444              :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3445              : 
    3446              :   followed by:
    3447              : 
    3448              :             TYPE avg = (TYPE) avg';
    3449              : 
    3450              :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3451              :   of avg is used, all or part of the cast of avg' should become redundant.
    3452              : 
    3453              :   If there is no target support available, generate code to distribute rshift
    3454              :   over plus and add a carry.  */
    3455              : 
    3456              : static gimple *
    3457     30647462 : vect_recog_average_pattern (vec_info *vinfo,
    3458              :                             stmt_vec_info last_stmt_info, tree *type_out)
    3459              : {
    3460              :   /* Check for a shift right by one bit.  */
    3461     30647462 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3462     20972331 :   if (!last_stmt
    3463     20972331 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3464       351188 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3465     30592787 :     return NULL;
    3466              : 
    3467              :   /* Check that the shift result is wider than the users of the
    3468              :      result need (i.e. that narrowing would be a natural choice).  */
    3469        54675 :   tree lhs = gimple_assign_lhs (last_stmt);
    3470        54675 :   tree type = TREE_TYPE (lhs);
    3471        54675 :   unsigned int target_precision
    3472        54675 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3473        54675 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3474              :     return NULL;
    3475              : 
    3476              :   /* Look through any change in sign on the shift input.  */
    3477         2186 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3478         2186 :   vect_unpromoted_value unprom_plus;
    3479         2186 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3480              :                                                      &unprom_plus);
    3481         2186 :   if (!rshift_rhs
    3482         2186 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3483              :     return NULL;
    3484              : 
    3485              :   /* Get the definition of the shift input.  */
    3486         2184 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3487         2184 :   if (!plus_stmt_info)
    3488              :     return NULL;
    3489              : 
    3490              :   /* Check whether the shift input can be seen as a tree of additions on
    3491              :      2 or 3 widened inputs.
    3492              : 
    3493              :      Note that the pattern should be a win even if the result of one or
    3494              :      more additions is reused elsewhere: if the pattern matches, we'd be
    3495              :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3496         8664 :   internal_fn ifn = IFN_AVG_FLOOR;
    3497         8664 :   vect_unpromoted_value unprom[3];
    3498         2166 :   tree new_type;
    3499         2166 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3500         2166 :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3501              :                                             unprom, &new_type);
    3502         2166 :   if (nops == 0)
    3503              :     return NULL;
    3504          907 :   if (nops == 3)
    3505              :     {
    3506              :       /* Check that one operand is 1.  */
    3507              :       unsigned int i;
    3508          987 :       for (i = 0; i < 3; ++i)
    3509          933 :         if (integer_onep (unprom[i].op))
    3510              :           break;
    3511          311 :       if (i == 3)
    3512              :         return NULL;
    3513              :       /* Throw away the 1 operand and keep the other two.  */
    3514          257 :       if (i < 2)
    3515            0 :         unprom[i] = unprom[2];
    3516              :       ifn = IFN_AVG_CEIL;
    3517              :     }
    3518              : 
    3519          853 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3520              : 
    3521              :   /* We know that:
    3522              : 
    3523              :      (a) the operation can be viewed as:
    3524              : 
    3525              :            TYPE widened0 = (TYPE) UNPROM[0];
    3526              :            TYPE widened1 = (TYPE) UNPROM[1];
    3527              :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3528              :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3529              : 
    3530              :      (b) the first two statements are equivalent to:
    3531              : 
    3532              :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3533              :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3534              : 
    3535              :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3536              :          where sensible;
    3537              : 
    3538              :      (d) all the operations can be performed correctly at twice the width of
    3539              :          NEW_TYPE, due to the nature of the average operation; and
    3540              : 
    3541              :      (e) users of the result of the right shift need only TARGET_PRECISION
    3542              :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3543              :          precision.
    3544              : 
    3545              :      Under these circumstances, the only situation in which NEW_TYPE
    3546              :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3547              :      and an addition result are all used more than once.  Thus we can
    3548              :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3549              :      as "free", whereas widening the result of the average instruction
    3550              :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3551              :      therefore better not to go narrower than TARGET_PRECISION.  */
    3552          853 :   if (TYPE_PRECISION (new_type) < target_precision)
    3553            0 :     new_type = build_nonstandard_integer_type (target_precision,
    3554            0 :                                                TYPE_UNSIGNED (new_type));
    3555              : 
    3556              :   /* Check for target support.  */
    3557          853 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3558          853 :   if (!new_vectype)
    3559              :     return NULL;
    3560              : 
    3561          853 :   bool fallback_p = false;
    3562              : 
    3563          853 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3564              :     ;
    3565          695 :   else if (TYPE_UNSIGNED (new_type)
    3566          256 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3567          256 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3568          256 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3569          951 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3570              :     fallback_p = true;
    3571              :   else
    3572          439 :     return NULL;
    3573              : 
    3574              :   /* The IR requires a valid vector type for the cast result, even though
    3575              :      it's likely to be discarded.  */
    3576          414 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3577          414 :   if (!*type_out)
    3578              :     return NULL;
    3579              : 
    3580          410 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3581          410 :   tree new_ops[2];
    3582          410 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3583              :                        unprom, new_vectype);
    3584              : 
    3585          410 :   if (fallback_p)
    3586              :     {
    3587              :       /* As a fallback, generate code for following sequence:
    3588              : 
    3589              :          shifted_op0 = new_ops[0] >> 1;
    3590              :          shifted_op1 = new_ops[1] >> 1;
    3591              :          sum_of_shifted = shifted_op0 + shifted_op1;
    3592              :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3593              :          carry = unmasked_carry & 1;
    3594              :          new_var = sum_of_shifted + carry;
    3595              :       */
    3596              : 
    3597          252 :       tree one_cst = build_one_cst (new_type);
    3598          252 :       gassign *g;
    3599              : 
    3600          252 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3601          252 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3602          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3603              : 
    3604          252 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3605          252 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3606          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3607              : 
    3608          252 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3609          252 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3610              :                                shifted_op0, shifted_op1);
    3611          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3612              : 
    3613          252 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3614          252 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3615          252 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3616          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3617              : 
    3618          252 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3619          252 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3620          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3621              : 
    3622          252 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3623          252 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3624              :     }
    3625              : 
    3626              :   /* Generate the IFN_AVG* call.  */
    3627          158 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3628              :                                                     new_ops[1]);
    3629          158 :   gimple_call_set_lhs (average_stmt, new_var);
    3630          158 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3631              : 
    3632          158 :   if (dump_enabled_p ())
    3633           31 :     dump_printf_loc (MSG_NOTE, vect_location,
    3634              :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3635              : 
    3636          158 :   return vect_convert_output (vinfo, last_stmt_info,
    3637          158 :                               type, average_stmt, new_vectype);
    3638              : }
    3639              : 
    3640              : /* Recognize cases in which the input to a cast is wider than its
    3641              :    output, and the input is fed by a widening operation.  Fold this
    3642              :    by removing the unnecessary intermediate widening.  E.g.:
    3643              : 
    3644              :      unsigned char a;
    3645              :      unsigned int b = (unsigned int) a;
    3646              :      unsigned short c = (unsigned short) b;
    3647              : 
    3648              :    -->
    3649              : 
    3650              :      unsigned short c = (unsigned short) a;
    3651              : 
    3652              :    Although this is rare in input IR, it is an expected side-effect
    3653              :    of the over-widening pattern above.
    3654              : 
    3655              :    This is beneficial also for integer-to-float conversions, if the
    3656              :    widened integer has more bits than the float, and if the unwidened
    3657              :    input doesn't.  */
    3658              : 
    3659              : static gimple *
    3660     30649244 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3661              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3662              : {
    3663              :   /* Check for a cast, including an integer-to-float conversion.  */
    3664     51579645 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3665     20973811 :   if (!last_stmt)
    3666              :     return NULL;
    3667     20973811 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3668     20973811 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3669              :     return NULL;
    3670              : 
    3671              :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3672      2964385 :   tree lhs = gimple_assign_lhs (last_stmt);
    3673      2964385 :   if (!lhs)
    3674              :     return NULL;
    3675      2964385 :   tree lhs_type = TREE_TYPE (lhs);
    3676      2964385 :   scalar_mode lhs_mode;
    3677      2944713 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3678      5907362 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3679        25190 :     return NULL;
    3680              : 
    3681              :   /* Check for a narrowing operation (from a vector point of view).  */
    3682      2939195 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3683      2939195 :   tree rhs_type = TREE_TYPE (rhs);
    3684      2939195 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3685      2629256 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3686      8037809 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3687              :     return NULL;
    3688              : 
    3689              :   /* Try to find an unpromoted input.  */
    3690       328652 :   vect_unpromoted_value unprom;
    3691       328652 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3692       328652 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3693              :     return NULL;
    3694              : 
    3695              :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3696              :      same when extending from UNPROM as they are when extending from RHS.  */
    3697        43540 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3698        43540 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3699              :     return NULL;
    3700              : 
    3701              :   /* We can get the same result by casting UNPROM directly, to avoid
    3702              :      the unnecessary widening and narrowing.  */
    3703        43410 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3704              : 
    3705        43410 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3706        43410 :   if (!*type_out)
    3707              :     return NULL;
    3708              : 
    3709        43410 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3710        43410 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3711        43410 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3712              : 
    3713        43410 :   return pattern_stmt;
    3714              : }
    3715              : 
    3716              : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3717              :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3718              : 
    3719              : static gimple *
    3720     30580639 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3721              :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3722              : {
    3723     30580639 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3724     30580639 :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3725     30580639 :                                       "vect_recog_widen_shift_pattern");
    3726              : }
    3727              : 
    3728              : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3729              : 
    3730              :    type a_t, b_t, c_t;
    3731              : 
    3732              :    S0 a_t = b_t r<< c_t;
    3733              : 
    3734              :   Input/Output:
    3735              : 
    3736              :   * STMT_VINFO: The stmt from which the pattern search begins,
    3737              :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3738              :     with a sequence:
    3739              : 
    3740              :    S1 d_t = -c_t;
    3741              :    S2 e_t = d_t & (B - 1);
    3742              :    S3 f_t = b_t << c_t;
    3743              :    S4 g_t = b_t >> e_t;
    3744              :    S0 a_t = f_t | g_t;
    3745              : 
    3746              :     where B is element bitsize of type.
    3747              : 
    3748              :   Output:
    3749              : 
    3750              :   * TYPE_OUT: The type of the output of this pattern.
    3751              : 
    3752              :   * Return value: A new stmt that will be used to replace the rotate
    3753              :     S0 stmt.  */
    3754              : 
    3755              : static gimple *
    3756     30580639 : vect_recog_rotate_pattern (vec_info *vinfo,
    3757              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3758              : {
    3759     30580639 :   gimple *last_stmt = stmt_vinfo->stmt;
    3760     30580639 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3761     30580639 :   gimple *pattern_stmt, *def_stmt;
    3762     30580639 :   enum tree_code rhs_code;
    3763     30580639 :   enum vect_def_type dt;
    3764     30580639 :   optab optab1, optab2;
    3765     30580639 :   edge ext_def = NULL;
    3766     30580639 :   bool bswap16_p = false;
    3767              : 
    3768     30580639 :   if (is_gimple_assign (last_stmt))
    3769              :     {
    3770     20905154 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3771     20905154 :       switch (rhs_code)
    3772              :         {
    3773         7437 :         case LROTATE_EXPR:
    3774         7437 :         case RROTATE_EXPR:
    3775         7437 :           break;
    3776              :         default:
    3777              :           return NULL;
    3778              :         }
    3779              : 
    3780         7437 :       lhs = gimple_assign_lhs (last_stmt);
    3781         7437 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3782         7437 :       type = TREE_TYPE (oprnd0);
    3783         7437 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3784              :     }
    3785      9675485 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3786              :     {
    3787              :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3788              :          The vectorizer has bswap support, but only if the argument isn't
    3789              :          promoted.  */
    3790          170 :       lhs = gimple_call_lhs (last_stmt);
    3791          170 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3792          170 :       type = TREE_TYPE (oprnd0);
    3793          170 :       if (!lhs
    3794          170 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3795          170 :           || TYPE_PRECISION (type) <= 16
    3796            0 :           || TREE_CODE (oprnd0) != SSA_NAME
    3797          170 :           || BITS_PER_UNIT != 8)
    3798          170 :         return NULL;
    3799              : 
    3800            0 :       stmt_vec_info def_stmt_info;
    3801            0 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3802              :         return NULL;
    3803              : 
    3804            0 :       if (dt != vect_internal_def)
    3805              :         return NULL;
    3806              : 
    3807            0 :       if (gimple_assign_cast_p (def_stmt))
    3808              :         {
    3809            0 :           def = gimple_assign_rhs1 (def_stmt);
    3810            0 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3811            0 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3812              :             oprnd0 = def;
    3813              :         }
    3814              : 
    3815            0 :       type = TREE_TYPE (lhs);
    3816            0 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3817            0 :       if (vectype == NULL_TREE)
    3818              :         return NULL;
    3819              : 
    3820            0 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3821              :         {
    3822              :           /* The encoding uses one stepped pattern for each byte in the
    3823              :              16-bit word.  */
    3824            0 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3825            0 :           for (unsigned i = 0; i < 3; ++i)
    3826            0 :             for (unsigned j = 0; j < 2; ++j)
    3827            0 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3828              : 
    3829            0 :           vec_perm_indices indices (elts, 1,
    3830            0 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3831            0 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3832            0 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3833              :             {
    3834              :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3835              :                  undo the argument promotion.  */
    3836            0 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3837              :                 {
    3838            0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3839            0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3840            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3841            0 :                   oprnd0 = def;
    3842              :                 }
    3843              : 
    3844              :               /* Pattern detected.  */
    3845            0 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3846              : 
    3847            0 :               *type_out = vectype;
    3848              : 
    3849              :               /* Pattern supported.  Create a stmt to be used to replace the
    3850              :                  pattern, with the unpromoted argument.  */
    3851            0 :               var = vect_recog_temp_ssa_var (type, NULL);
    3852            0 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3853              :                                                 1, oprnd0);
    3854            0 :               gimple_call_set_lhs (pattern_stmt, var);
    3855            0 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3856              :                                       gimple_call_fntype (last_stmt));
    3857            0 :               return pattern_stmt;
    3858              :             }
    3859            0 :         }
    3860              : 
    3861            0 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3862            0 :       rhs_code = LROTATE_EXPR;
    3863            0 :       bswap16_p = true;
    3864              :     }
    3865              :   else
    3866              :     return NULL;
    3867              : 
    3868         7437 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3869         7317 :       || !INTEGRAL_TYPE_P (type)
    3870        14433 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3871              :     return NULL;
    3872              : 
    3873         6996 :   stmt_vec_info def_stmt_info;
    3874         6996 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3875              :     return NULL;
    3876              : 
    3877         6996 :   if (dt != vect_internal_def
    3878         6754 :       && dt != vect_constant_def
    3879           25 :       && dt != vect_external_def)
    3880              :     return NULL;
    3881              : 
    3882         6990 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3883         6990 :   if (vectype == NULL_TREE)
    3884              :     return NULL;
    3885              : 
    3886              :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3887              :      don't do anything here.  */
    3888         6763 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3889         6763 :   if (optab1
    3890         6763 :       && can_implement_p (optab1, TYPE_MODE (vectype)))
    3891              :     {
    3892          576 :      use_rotate:
    3893          576 :       if (bswap16_p)
    3894              :         {
    3895            0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3896              :             {
    3897            0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3898            0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3899            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3900            0 :               oprnd0 = def;
    3901              :             }
    3902              : 
    3903              :           /* Pattern detected.  */
    3904            0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3905              : 
    3906            0 :           *type_out = vectype;
    3907              : 
    3908              :           /* Pattern supported.  Create a stmt to be used to replace the
    3909              :              pattern.  */
    3910            0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3911            0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3912              :                                               oprnd1);
    3913            0 :           return pattern_stmt;
    3914              :         }
    3915              :       return NULL;
    3916              :     }
    3917              : 
    3918         6727 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3919              :     {
    3920         6643 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3921         6643 :       if (optab2
    3922         6643 :           && can_implement_p (optab2, TYPE_MODE (vectype)))
    3923          540 :         goto use_rotate;
    3924              :     }
    3925              : 
    3926              :   /* We may not use a reduction operand twice.  */
    3927         6187 :   if (vect_is_reduction (stmt_vinfo))
    3928              :     return NULL;
    3929              : 
    3930         6166 :   tree utype = unsigned_type_for (type);
    3931         6166 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3932         6166 :   if (!uvectype)
    3933              :     return NULL;
    3934              : 
    3935              :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3936              :      don't do anything here either.  */
    3937         6166 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3938         6166 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3939         6166 :   if (!optab1
    3940         6166 :       || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3941          806 :       || !optab2
    3942         6972 :       || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3943              :     {
    3944         5360 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3945              :         return NULL;
    3946         5297 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3947         5297 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3948         5297 :       if (!optab1
    3949         5297 :           || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3950         3996 :           || !optab2
    3951         9293 :           || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3952         1301 :         return NULL;
    3953              :     }
    3954              : 
    3955         4802 :   *type_out = vectype;
    3956              : 
    3957         4802 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3958              :     {
    3959           52 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3960           52 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3961           52 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3962           52 :       oprnd0 = def;
    3963              :     }
    3964              : 
    3965         4802 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3966           15 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3967              : 
    3968         4802 :   def = NULL_TREE;
    3969         4802 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3970         4802 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3971              :     def = oprnd1;
    3972           28 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3973              :     {
    3974            0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3975            0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3976            0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3977            0 :              == TYPE_PRECISION (type))
    3978              :         def = rhs1;
    3979              :     }
    3980              : 
    3981         4774 :   if (def == NULL_TREE)
    3982              :     {
    3983           28 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3984           28 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3985           28 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3986              :     }
    3987         4802 :   stype = TREE_TYPE (def);
    3988              : 
    3989         4802 :   if (TREE_CODE (def) == INTEGER_CST)
    3990              :     {
    3991         4680 :       if (!tree_fits_uhwi_p (def)
    3992         4680 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3993         9360 :           || integer_zerop (def))
    3994            0 :         return NULL;
    3995         4680 :       def2 = build_int_cst (stype,
    3996         4680 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3997              :     }
    3998              :   else
    3999              :     {
    4000          122 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    4001              : 
    4002          122 :       if (vecstype == NULL_TREE)
    4003              :         return NULL;
    4004          122 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4005          122 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    4006          122 :       if (ext_def)
    4007              :         {
    4008           15 :           basic_block new_bb
    4009           15 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4010           15 :           gcc_assert (!new_bb);
    4011              :         }
    4012              :       else
    4013          107 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4014              : 
    4015          122 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4016          122 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    4017          122 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    4018              :                                       gimple_assign_lhs (def_stmt), mask);
    4019          122 :       if (ext_def)
    4020              :         {
    4021           15 :           basic_block new_bb
    4022           15 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4023           15 :           gcc_assert (!new_bb);
    4024              :         }
    4025              :       else
    4026          107 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4027              :     }
    4028              : 
    4029         4802 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    4030         9509 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    4031              :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    4032              :                                   oprnd0, def);
    4033         4802 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4034              : 
    4035         4802 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    4036         9509 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    4037              :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    4038              :                                   oprnd0, def2);
    4039         4802 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4040              : 
    4041              :   /* Pattern detected.  */
    4042         4802 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    4043              : 
    4044              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4045         4802 :   var = vect_recog_temp_ssa_var (utype, NULL);
    4046         4802 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    4047              : 
    4048         4802 :   if (!useless_type_conversion_p (type, utype))
    4049              :     {
    4050           52 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    4051           52 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    4052           52 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    4053              :     }
    4054              :   return pattern_stmt;
    4055              : }
    4056              : 
    4057              : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    4058              :    vectorized:
    4059              : 
    4060              :    type a_t;
    4061              :    TYPE b_T, res_T;
    4062              : 
    4063              :    S1 a_t = ;
    4064              :    S2 b_T = ;
    4065              :    S3 res_T = b_T op a_t;
    4066              : 
    4067              :   where type 'TYPE' is a type with different size than 'type',
    4068              :   and op is <<, >> or rotate.
    4069              : 
    4070              :   Also detect cases:
    4071              : 
    4072              :    type a_t;
    4073              :    TYPE b_T, c_T, res_T;
    4074              : 
    4075              :    S0 c_T = ;
    4076              :    S1 a_t = (type) c_T;
    4077              :    S2 b_T = ;
    4078              :    S3 res_T = b_T op a_t;
    4079              : 
    4080              :   Input/Output:
    4081              : 
    4082              :   * STMT_VINFO: The stmt from which the pattern search begins,
    4083              :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4084              :     with a shift/rotate which has same type on both operands, in the
    4085              :     second case just b_T op c_T, in the first case with added cast
    4086              :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4087              : 
    4088              :   Output:
    4089              : 
    4090              :   * TYPE_OUT: The type of the output of this pattern.
    4091              : 
    4092              :   * Return value: A new stmt that will be used to replace the shift/rotate
    4093              :     S3 stmt.  */
    4094              : 
    4095              : static gimple *
    4096     30585787 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4097              :                                         stmt_vec_info stmt_vinfo,
    4098              :                                         tree *type_out)
    4099              : {
    4100     30585787 :   gimple *last_stmt = stmt_vinfo->stmt;
    4101     30585787 :   tree oprnd0, oprnd1, lhs, var;
    4102     30585787 :   gimple *pattern_stmt;
    4103     30585787 :   enum tree_code rhs_code;
    4104              : 
    4105     30585787 :   if (!is_gimple_assign (last_stmt))
    4106              :     return NULL;
    4107              : 
    4108     20910302 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4109     20910302 :   switch (rhs_code)
    4110              :     {
    4111       498393 :     case LSHIFT_EXPR:
    4112       498393 :     case RSHIFT_EXPR:
    4113       498393 :     case LROTATE_EXPR:
    4114       498393 :     case RROTATE_EXPR:
    4115       498393 :       break;
    4116              :     default:
    4117              :       return NULL;
    4118              :     }
    4119              : 
    4120       498393 :   lhs = gimple_assign_lhs (last_stmt);
    4121       498393 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4122       498393 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4123       498393 :   if (TREE_CODE (oprnd1) != SSA_NAME
    4124       107621 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4125        48749 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4126        48353 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4127       546746 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4128        48353 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4129       450040 :     return NULL;
    4130              : 
    4131        48353 :   stmt_vec_info def_vinfo = vinfo->lookup_def (oprnd1);
    4132        48353 :   if (!def_vinfo || STMT_VINFO_DEF_TYPE (def_vinfo) == vect_external_def)
    4133              :     return NULL;
    4134              : 
    4135        45432 :   def_vinfo = vect_stmt_to_vectorize (def_vinfo);
    4136         1135 :   gcc_assert (def_vinfo);
    4137              : 
    4138        45432 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4139        45432 :   if (*type_out == NULL_TREE)
    4140              :     return NULL;
    4141              : 
    4142        32957 :   tree def = NULL_TREE;
    4143        32957 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4144        19414 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4145              :     {
    4146         5286 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4147         5286 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4148         5286 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4149         1182 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4150              :         {
    4151         1182 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4152         1182 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4153              :             def = rhs1;
    4154              :           else
    4155              :             {
    4156         1095 :               tree mask
    4157         1095 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4158         1095 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4159         1095 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4160         1095 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4161         1095 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4162         1095 :                                                            TREE_TYPE (rhs1));
    4163         1095 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4164              :             }
    4165              :         }
    4166              :     }
    4167              : 
    4168         1182 :   if (def == NULL_TREE)
    4169              :     {
    4170        31775 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4171        31775 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4172        31775 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4173              :     }
    4174              : 
    4175              :   /* Pattern detected.  */
    4176        32957 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4177              : 
    4178              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4179        32957 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4180        32957 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4181              : 
    4182        32957 :   return pattern_stmt;
    4183              : }
    4184              : 
    4185              : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4186              :    needed by the multiplication-by-immediate synthesis algorithm described by
    4187              :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4188              :    present.  Return true iff the target supports all the steps.  */
    4189              : 
    4190              : static bool
    4191       296034 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4192              :                                  tree vectype, bool synth_shift_p)
    4193              : {
    4194       296034 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4195              :     return false;
    4196              : 
    4197       296034 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4198       296034 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4199              : 
    4200       296034 :   if (var == negate_variant
    4201       296034 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4202              :     return false;
    4203              : 
    4204              :   /* If we must synthesize shifts with additions make sure that vector
    4205              :      addition is available.  */
    4206       295453 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4207              :     return false;
    4208              : 
    4209       141436 :   for (int i = 1; i < alg->ops; i++)
    4210              :     {
    4211       106342 :       switch (alg->op[i])
    4212              :         {
    4213              :         case alg_shift:
    4214              :           break;
    4215        26331 :         case alg_add_t_m2:
    4216        26331 :         case alg_add_t2_m:
    4217        26331 :         case alg_add_factor:
    4218        26331 :           if (!supports_vplus)
    4219              :             return false;
    4220              :           break;
    4221        16586 :         case alg_sub_t_m2:
    4222        16586 :         case alg_sub_t2_m:
    4223        16586 :         case alg_sub_factor:
    4224        16586 :           if (!supports_vminus)
    4225              :             return false;
    4226              :           break;
    4227              :         case alg_unknown:
    4228              :         case alg_m:
    4229              :         case alg_zero:
    4230              :         case alg_impossible:
    4231              :           return false;
    4232            0 :         default:
    4233            0 :           gcc_unreachable ();
    4234              :         }
    4235              :     }
    4236              : 
    4237              :   return true;
    4238              : }
    4239              : 
    4240              : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4241              :    putting the final result in DEST.  Append all statements but the last into
    4242              :    VINFO.  Return the last statement.  */
    4243              : 
    4244              : static gimple *
    4245            0 : synth_lshift_by_additions (vec_info *vinfo,
    4246              :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4247              :                            stmt_vec_info stmt_info)
    4248              : {
    4249            0 :   HOST_WIDE_INT i;
    4250            0 :   tree itype = TREE_TYPE (op);
    4251            0 :   tree prev_res = op;
    4252            0 :   gcc_assert (amnt >= 0);
    4253            0 :   for (i = 0; i < amnt; i++)
    4254              :     {
    4255            0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4256              :                       : dest;
    4257            0 :       gimple *stmt
    4258            0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4259            0 :       prev_res = tmp_var;
    4260            0 :       if (i < amnt - 1)
    4261            0 :         append_pattern_def_seq (vinfo, stmt_info, stmt);
    4262              :       else
    4263            0 :         return stmt;
    4264              :     }
    4265            0 :   gcc_unreachable ();
    4266              :   return NULL;
    4267              : }
    4268              : 
    4269              : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4270              :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4271              :    the process if necessary.  Append the resulting assignment statements
    4272              :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4273              :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4274              :    left shifts using additions.  */
    4275              : 
    4276              : static tree
    4277        42816 : apply_binop_and_append_stmt (vec_info *vinfo,
    4278              :                              tree_code code, tree op1, tree op2,
    4279              :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    4280              : {
    4281        42816 :   if (integer_zerop (op2)
    4282        42816 :       && (code == LSHIFT_EXPR
    4283        37092 :           || code == PLUS_EXPR))
    4284              :     {
    4285        37092 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4286              :       return op1;
    4287              :     }
    4288              : 
    4289         5724 :   gimple *stmt;
    4290         5724 :   tree itype = TREE_TYPE (op1);
    4291         5724 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4292              : 
    4293         5724 :   if (code == LSHIFT_EXPR
    4294         5724 :       && synth_shift_p)
    4295              :     {
    4296            0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4297            0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
    4298            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4299            0 :       return tmp_var;
    4300              :     }
    4301              : 
    4302         5724 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4303         5724 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4304         5724 :   return tmp_var;
    4305              : }
    4306              : 
    4307              : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4308              :    and simple arithmetic operations to be vectorized.  Record the statements
    4309              :    produced in STMT_VINFO and return the last statement in the sequence or
    4310              :    NULL if it's not possible to synthesize such a multiplication.
    4311              :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4312              :    works on tree-ssa form.  */
    4313              : 
    4314              : static gimple *
    4315       298738 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4316              :                              stmt_vec_info stmt_vinfo)
    4317              : {
    4318       298738 :   tree itype = TREE_TYPE (op);
    4319       298738 :   machine_mode mode = TYPE_MODE (itype);
    4320       298738 :   struct algorithm alg;
    4321       298738 :   mult_variant variant;
    4322       298738 :   if (!tree_fits_shwi_p (val))
    4323              :     return NULL;
    4324              : 
    4325              :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4326              :      signed overflow where the original operation didn't.  Perform the
    4327              :      operations on an unsigned type and cast back to avoid this.
    4328              :      In the future we may want to relax this for synthesis algorithms
    4329              :      that we can prove do not cause unexpected overflow.  */
    4330       296049 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4331              : 
    4332        58168 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4333       296049 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4334       296049 :   if (!vectype)
    4335              :     return NULL;
    4336              : 
    4337              :   /* Targets that don't support vector shifts but support vector additions
    4338              :      can synthesize shifts that way.  */
    4339       296049 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4340              : 
    4341       296049 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4342              :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4343              :      The vectorizer's benefit analysis will decide whether it's beneficial
    4344              :      to do this.  */
    4345       592098 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4346       296049 :                                        ? TYPE_MODE (vectype) : mode,
    4347              :                                        hwval, &alg, &variant, MAX_COST);
    4348       296049 :   if (!possible)
    4349              :     return NULL;
    4350              : 
    4351       296049 :   if (vect_is_reduction (stmt_vinfo))
    4352              :     {
    4353           26 :       int op_uses = alg.op[0] != alg_zero;
    4354           45 :       for (int i = 1; i < alg.ops; i++)
    4355           32 :         switch (alg.op[i])
    4356              :           {
    4357            4 :           case alg_add_t_m2:
    4358            4 :           case alg_sub_t_m2:
    4359            4 :             if (synth_shift_p && alg.log[i])
    4360              :               return NULL;
    4361              :             else
    4362            4 :               op_uses++;
    4363            4 :             break;
    4364            0 :           case alg_add_t2_m:
    4365            0 :           case alg_sub_t2_m:
    4366            0 :             op_uses++;
    4367              :             /* Fallthru.  */
    4368           28 :           case alg_shift:
    4369           28 :             if (synth_shift_p && alg.log[i])
    4370              :               return NULL;
    4371              :             break;
    4372              :           case alg_add_factor:
    4373              :           case alg_sub_factor:
    4374              :             return NULL;
    4375              :           default:
    4376              :             break;
    4377              :           }
    4378           13 :       if (variant == add_variant)
    4379            0 :         op_uses++;
    4380              :       /* When we'll synthesize more than a single use of the reduction
    4381              :          operand the reduction constraints are violated.  Avoid this
    4382              :          situation.  */
    4383           13 :       if (op_uses > 1)
    4384              :         return NULL;
    4385              :     }
    4386              : 
    4387       296034 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4388              :     return NULL;
    4389              : 
    4390        35094 :   tree accumulator;
    4391              : 
    4392              :   /* Clear out the sequence of statements so we can populate it below.  */
    4393        35094 :   gimple *stmt = NULL;
    4394              : 
    4395        35094 :   if (cast_to_unsigned_p)
    4396              :     {
    4397        11927 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4398        11927 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4399        11927 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4400        11927 :       op = tmp_op;
    4401              :     }
    4402              : 
    4403        35094 :   if (alg.op[0] == alg_zero)
    4404          205 :     accumulator = build_int_cst (multtype, 0);
    4405              :   else
    4406              :     accumulator = op;
    4407              : 
    4408        35094 :   bool needs_fixup = (variant == negate_variant)
    4409        35094 :                       || (variant == add_variant);
    4410              : 
    4411       141267 :   for (int i = 1; i < alg.ops; i++)
    4412              :     {
    4413       106173 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4414       106173 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4415       106173 :       tree tmp_var = NULL_TREE;
    4416              : 
    4417       106173 :       switch (alg.op[i])
    4418              :         {
    4419        63357 :         case alg_shift:
    4420        63357 :           if (synth_shift_p)
    4421            0 :             stmt
    4422            0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4423            0 :                                            alg.log[i], stmt_vinfo);
    4424              :           else
    4425        63357 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4426              :                                          shft_log);
    4427              :           break;
    4428        21502 :         case alg_add_t_m2:
    4429        21502 :           tmp_var
    4430        21502 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4431              :                                            stmt_vinfo, synth_shift_p);
    4432        21502 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4433              :                                        tmp_var);
    4434        21502 :           break;
    4435        15783 :         case alg_sub_t_m2:
    4436        15783 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4437              :                                                  shft_log, stmt_vinfo,
    4438              :                                                  synth_shift_p);
    4439              :           /* In some algorithms the first step involves zeroing the
    4440              :              accumulator.  If subtracting from such an accumulator
    4441              :              just emit the negation directly.  */
    4442        15783 :           if (integer_zerop (accumulator))
    4443          205 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4444              :           else
    4445        15578 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4446              :                                         tmp_var);
    4447              :           break;
    4448            0 :         case alg_add_t2_m:
    4449            0 :           tmp_var
    4450            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4451              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4452            0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4453            0 :           break;
    4454            0 :         case alg_sub_t2_m:
    4455            0 :           tmp_var
    4456            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4457              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4458            0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4459            0 :           break;
    4460         4766 :         case alg_add_factor:
    4461         4766 :           tmp_var
    4462         4766 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4463              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4464         4766 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4465              :                                        tmp_var);
    4466         4766 :           break;
    4467          765 :         case alg_sub_factor:
    4468          765 :           tmp_var
    4469          765 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4470              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4471          765 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4472              :                                       accumulator);
    4473          765 :           break;
    4474            0 :         default:
    4475            0 :           gcc_unreachable ();
    4476              :         }
    4477              :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4478              :          but rather return it directly.  */
    4479              : 
    4480       106173 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4481        83298 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4482       106173 :       accumulator = accum_tmp;
    4483              :     }
    4484        35094 :   if (variant == negate_variant)
    4485              :     {
    4486          424 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4487          424 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4488          424 :       accumulator = accum_tmp;
    4489          424 :       if (cast_to_unsigned_p)
    4490          142 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4491              :     }
    4492        34670 :   else if (variant == add_variant)
    4493              :     {
    4494           93 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4495           93 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4496           93 :       accumulator = accum_tmp;
    4497           93 :       if (cast_to_unsigned_p)
    4498           83 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4499              :     }
    4500              :   /* Move back to a signed if needed.  */
    4501        34802 :   if (cast_to_unsigned_p)
    4502              :     {
    4503        11927 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4504        11927 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4505              :     }
    4506              : 
    4507              :   return stmt;
    4508              : }
    4509              : 
    4510              : /* Detect multiplication by constant and convert it into a sequence of
    4511              :    shifts and additions, subtractions, negations.  We reuse the
    4512              :    choose_mult_variant algorithms from expmed.cc
    4513              : 
    4514              :    Input/Output:
    4515              : 
    4516              :    STMT_VINFO: The stmt from which the pattern search begins,
    4517              :    i.e. the mult stmt.
    4518              : 
    4519              :  Output:
    4520              : 
    4521              :   * TYPE_OUT: The type of the output of this pattern.
    4522              : 
    4523              :   * Return value: A new stmt that will be used to replace
    4524              :     the multiplication.  */
    4525              : 
    4526              : static gimple *
    4527     30769739 : vect_recog_mult_pattern (vec_info *vinfo,
    4528              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4529              : {
    4530     30769739 :   gimple *last_stmt = stmt_vinfo->stmt;
    4531     30769739 :   tree oprnd0, oprnd1, vectype, itype;
    4532     30769739 :   gimple *pattern_stmt;
    4533              : 
    4534     30769739 :   if (!is_gimple_assign (last_stmt))
    4535              :     return NULL;
    4536              : 
    4537     21094254 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4538              :     return NULL;
    4539              : 
    4540      1440817 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4541      1440817 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4542      1440817 :   itype = TREE_TYPE (oprnd0);
    4543              : 
    4544      1440817 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4545      1440754 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4546       896869 :       || !INTEGRAL_TYPE_P (itype)
    4547      2337686 :       || !type_has_mode_precision_p (itype))
    4548       544000 :     return NULL;
    4549              : 
    4550       896817 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4551       896817 :   if (vectype == NULL_TREE)
    4552              :     return NULL;
    4553              : 
    4554              :   /* If the target can handle vectorized multiplication natively,
    4555              :      don't attempt to optimize this.  */
    4556       729370 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4557       729370 :   if (mul_optab != unknown_optab
    4558       729370 :       && can_implement_p (mul_optab, TYPE_MODE (vectype)))
    4559              :     return NULL;
    4560              : 
    4561       298738 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4562              :                                               oprnd0, oprnd1, stmt_vinfo);
    4563       298738 :   if (!pattern_stmt)
    4564              :     return NULL;
    4565              : 
    4566              :   /* Pattern detected.  */
    4567        35094 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4568              : 
    4569        35094 :   *type_out = vectype;
    4570              : 
    4571        35094 :   return pattern_stmt;
    4572              : }
    4573              : 
    4574              : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
    4575              : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
    4576              : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4577              : 
    4578              : extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
    4579              : 
    4580              : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
    4581              : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
    4582              : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4583              : 
    4584              : static gimple *
    4585          300 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
    4586              :                                      internal_fn fn, tree *type_out,
    4587              :                                      tree lhs, tree op_0, tree op_1)
    4588              : {
    4589          300 :   tree itype = TREE_TYPE (op_0);
    4590          300 :   tree otype = TREE_TYPE (lhs);
    4591          300 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4592          300 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4593              : 
    4594          300 :   if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4595          300 :     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    4596              :     {
    4597           97 :       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
    4598           97 :       tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
    4599              : 
    4600           97 :       gimple_call_set_lhs (call, in_ssa);
    4601           97 :       gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4602           97 :       gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
    4603              : 
    4604           97 :       *type_out = v_otype;
    4605              : 
    4606           97 :       if (types_compatible_p (itype, otype))
    4607              :         return call;
    4608              :       else
    4609              :         {
    4610            0 :           append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
    4611            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4612              : 
    4613            0 :           return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
    4614              :         }
    4615              :     }
    4616              : 
    4617              :   return NULL;
    4618              : }
    4619              : 
    4620              : /*
    4621              :  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
    4622              :  *   _7 = _4 + _6;
    4623              :  *   _8 = _4 > _7;
    4624              :  *   _9 = (long unsigned int) _8;
    4625              :  *   _10 = -_9;
    4626              :  *   _12 = _7 | _10;
    4627              :  *
    4628              :  * And then simplied to
    4629              :  *   _12 = .SAT_ADD (_4, _6);
    4630              :  */
    4631              : 
    4632              : static gimple *
    4633     30840155 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4634              :                             tree *type_out)
    4635              : {
    4636     30840155 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4637              : 
    4638     30840155 :   if (!is_gimple_assign (last_stmt))
    4639              :     return NULL;
    4640              : 
    4641     21164670 :   tree ops[2];
    4642     21164670 :   tree lhs = gimple_assign_lhs (last_stmt);
    4643              : 
    4644     21164670 :   if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
    4645     21164670 :       || gimple_signed_integer_sat_add (lhs, ops, NULL))
    4646              :     {
    4647           62 :       if (TREE_CODE (ops[1]) == INTEGER_CST)
    4648           12 :         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
    4649              : 
    4650           62 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4651              :                                                           IFN_SAT_ADD, type_out,
    4652              :                                                           lhs, ops[0], ops[1]);
    4653           62 :       if (stmt)
    4654              :         {
    4655           44 :           vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
    4656           44 :           return stmt;
    4657              :         }
    4658              :     }
    4659              : 
    4660              :   return NULL;
    4661              : }
    4662              : 
    4663              : /*
    4664              :  * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
    4665              :  * the benchmark zip.  Aka:
    4666              :  *
    4667              :  *   unsigned int _1;
    4668              :  *   unsigned int _2;
    4669              :  *   unsigned short int _4;
    4670              :  *   _9 = (unsigned short int).SAT_SUB (_1, _2);
    4671              :  *
    4672              :  *   if _1 is known to be in the range of unsigned short int.  For example
    4673              :  *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
    4674              :  *   truncation to:
    4675              :  *
    4676              :  *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
    4677              :  *   _9 = .SAT_SUB (_4, _3);
    4678              :  *
    4679              :  *   Then,  we can better vectorized code and avoid the unnecessary narrowing
    4680              :  *   stmt during vectorization with below stmt(s).
    4681              :  *
    4682              :  *   _3 = .SAT_TRUNC(_2); // SI => HI
    4683              :  *   _9 = .SAT_SUB (_4, _3);
    4684              :  */
    4685              : static void
    4686          238 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
    4687              :                                       stmt_vec_info stmt_vinfo,
    4688              :                                       tree lhs, tree *ops)
    4689              : {
    4690          238 :   tree otype = TREE_TYPE (lhs);
    4691          238 :   tree itype = TREE_TYPE (ops[0]);
    4692          238 :   unsigned itype_prec = TYPE_PRECISION (itype);
    4693          238 :   unsigned otype_prec = TYPE_PRECISION (otype);
    4694              : 
    4695          238 :   if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
    4696          238 :     return;
    4697              : 
    4698            0 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4699            0 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4700            0 :   tree_pair v_pair = tree_pair (v_otype, v_itype);
    4701              : 
    4702            0 :   if (v_otype == NULL_TREE || v_itype == NULL_TREE
    4703            0 :     || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
    4704              :                                         OPTIMIZE_FOR_BOTH))
    4705            0 :     return;
    4706              : 
    4707              :   /* 1. Find the _4 and update ops[0] as above example.  */
    4708            0 :   vect_unpromoted_value unprom;
    4709            0 :   tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
    4710              : 
    4711            0 :   if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
    4712              :     return;
    4713              : 
    4714            0 :   ops[0] = tmp;
    4715              : 
    4716              :   /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
    4717            0 :   tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4718            0 :   gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
    4719              : 
    4720            0 :   gimple_call_set_lhs (call, trunc_lhs_ssa);
    4721            0 :   gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4722            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
    4723              : 
    4724            0 :   ops[1] = trunc_lhs_ssa;
    4725              : }
    4726              : 
    4727              : /*
    4728              :  * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
    4729              :  * Unsigned:
    4730              :  *   _7 = _1 >= _2;
    4731              :  *   _8 = _1 - _2;
    4732              :  *   _10 = (long unsigned int) _7;
    4733              :  *   _9 = _8 * _10;
    4734              :  *
    4735              :  * And then simplied to
    4736              :  *   _9 = .SAT_SUB (_1, _2);
    4737              :  *
    4738              :  * Signed:
    4739              :  *   x.0_4 = (unsigned char) x_16;
    4740              :  *   y.1_5 = (unsigned char) y_18;
    4741              :  *   _6 = x.0_4 - y.1_5;
    4742              :  *   minus_19 = (int8_t) _6;
    4743              :  *   _7 = x_16 ^ y_18;
    4744              :  *   _8 = x_16 ^ minus_19;
    4745              :  *   _44 = _7 < 0;
    4746              :  *   _23 = x_16 < 0;
    4747              :  *   _24 = (signed char) _23;
    4748              :  *   _58 = (unsigned char) _24;
    4749              :  *   _59 = -_58;
    4750              :  *   _25 = (signed char) _59;
    4751              :  *   _26 = _25 ^ 127;
    4752              :  *   _42 = _8 < 0;
    4753              :  *   _41 = _42 & _44;
    4754              :  *   iftmp.2_11 = _41 ? _26 : minus_19;
    4755              :  *
    4756              :  * And then simplied to
    4757              :  *   iftmp.2_11 = .SAT_SUB (x_16, y_18);
    4758              :  */
    4759              : 
    4760              : static gimple *
    4761     30840111 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4762              :                             tree *type_out)
    4763              : {
    4764     30840111 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4765              : 
    4766     30840111 :   if (!is_gimple_assign (last_stmt))
    4767              :     return NULL;
    4768              : 
    4769     21164626 :   tree ops[2];
    4770     21164626 :   tree lhs = gimple_assign_lhs (last_stmt);
    4771              : 
    4772     21164626 :   if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
    4773     21164626 :       || gimple_signed_integer_sat_sub (lhs, ops, NULL))
    4774              :     {
    4775          238 :       vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
    4776          238 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4777              :                                                           IFN_SAT_SUB, type_out,
    4778              :                                                           lhs, ops[0], ops[1]);
    4779          238 :       if (stmt)
    4780              :         {
    4781           53 :           vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
    4782           53 :           return stmt;
    4783              :         }
    4784              :     }
    4785              : 
    4786              :   return NULL;
    4787              : }
    4788              : 
    4789              : /*
    4790              :  * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
    4791              :  *   overflow_5 = x_4(D) > 4294967295;
    4792              :  *   _1 = (unsigned int) x_4(D);
    4793              :  *   _2 = (unsigned int) overflow_5;
    4794              :  *   _3 = -_2;
    4795              :  *   _6 = _1 | _3;
    4796              :  *
    4797              :  * And then simplied to
    4798              :  *   _6 = .SAT_TRUNC (x_4(D));
    4799              :  */
    4800              : 
    4801              : static gimple *
    4802     30840058 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4803              :                               tree *type_out)
    4804              : {
    4805     30840058 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4806              : 
    4807     30840058 :   if (!is_gimple_assign (last_stmt))
    4808              :     return NULL;
    4809              : 
    4810     21164573 :   tree ops[1];
    4811     21164573 :   tree lhs = gimple_assign_lhs (last_stmt);
    4812     21164573 :   tree otype = TREE_TYPE (lhs);
    4813              : 
    4814     21164573 :   if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
    4815     21164573 :        && type_has_mode_precision_p (otype))
    4816              :     {
    4817           16 :       tree itype = TREE_TYPE (ops[0]);
    4818           16 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4819           16 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4820           16 :       internal_fn fn = IFN_SAT_TRUNC;
    4821              : 
    4822           16 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4823           32 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4824              :                                            OPTIMIZE_FOR_BOTH))
    4825              :         {
    4826            0 :           tree temp = vect_recog_temp_ssa_var (itype, NULL);
    4827            0 :           gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, itype, build_zero_cst(itype), ops[0]));
    4828            0 :           append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
    4829              : 
    4830            0 :           gcall *call = gimple_build_call_internal (fn, 1, temp);
    4831            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4832              : 
    4833            0 :           gimple_call_set_lhs (call, out_ssa);
    4834            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4835            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4836              : 
    4837            0 :           *type_out = v_otype;
    4838              : 
    4839            0 :           return call;
    4840              :         }
    4841              : 
    4842              :     }
    4843              : 
    4844     21164573 :   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    4845     21164250 :        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
    4846     21164573 :       && type_has_mode_precision_p (otype))
    4847              :     {
    4848          311 :       tree itype = TREE_TYPE (ops[0]);
    4849          311 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4850          311 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4851          311 :       internal_fn fn = IFN_SAT_TRUNC;
    4852              : 
    4853          305 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4854          616 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4855              :                                            OPTIMIZE_FOR_BOTH))
    4856              :         {
    4857            0 :           gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
    4858            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4859              : 
    4860            0 :           gimple_call_set_lhs (call, out_ssa);
    4861            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4862            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4863              : 
    4864            0 :           *type_out = v_otype;
    4865              : 
    4866            0 :           return call;
    4867              :         }
    4868              :     }
    4869              : 
    4870              :   return NULL;
    4871              : }
    4872              : 
    4873              : 
    4874              : /* Function add_code_for_floorceilround_divmod
    4875              :    A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
    4876              :    FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
    4877              :    ROUND_DIV_EXPR
    4878              :    The quotient and remainder are needed for implemented these operators.
    4879              :    FLOOR cases
    4880              :    r = x %[fl] y; r = x/[fl] y;
    4881              :    is
    4882              :    r = x % y; if (r && (x ^ y) < 0) r += y;
    4883              :    r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
    4884              :    Produce following sequence
    4885              :    v0 = x^y
    4886              :    v1 = -r
    4887              :    v2 = r | -r
    4888              :    v3 = v0 & v2
    4889              :    v4 = v3 < 0
    4890              :    if (floor_mod)
    4891              :      v5 = v4 ? y : 0
    4892              :      v6 = r + v5
    4893              :    if (floor_div)
    4894              :      v5 = v4 ? 1 : 0
    4895              :      v6 = d - 1
    4896              :    Similar sequences of vector instructions are produces for following cases
    4897              :    CEIL cases
    4898              :    r = x %[cl] y; r = x/[cl] y;
    4899              :    is
    4900              :    r = x % y; if (r && (x ^ y) >= 0) r -= y;
    4901              :    r = x % y; if (r) r -= y; (unsigned)
    4902              :    r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
    4903              :    r = x % y; d = x/y; if (r) d++; (unsigned)
    4904              :    ROUND cases
    4905              :    r = x %[rd] y; r = x/[rd] y;
    4906              :    is
    4907              :    r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
    4908              :    r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
    4909              :    r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
    4910              :    r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
    4911              :    Inputs:
    4912              :      VECTYPE: Vector type of the operands
    4913              :      STMT_VINFO: Statement where pattern begins
    4914              :      RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
    4915              :      Q: The quotient of division
    4916              :      R: Remainder of division
    4917              :      OPRDN0/OPRND1: Actual operands involved
    4918              :      ITYPE: tree type of oprnd0
    4919              :    Output:
    4920              :      NULL if vectorization not possible
    4921              :      Gimple statement based on rhs_code
    4922              : */
    4923              : static gimple *
    4924          431 : add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
    4925              :                                     stmt_vec_info stmt_vinfo,
    4926              :                                     enum tree_code rhs_code, tree q, tree r,
    4927              :                                     tree oprnd0, tree oprnd1, tree itype)
    4928              : {
    4929          431 :   gimple *def_stmt;
    4930          431 :   tree mask_vectype = truth_type_for (vectype);
    4931          431 :   if (!mask_vectype)
    4932              :     return NULL;
    4933          431 :   tree bool_cond;
    4934          431 :   bool unsigned_p = TYPE_UNSIGNED (itype);
    4935              : 
    4936          431 :   switch (rhs_code)
    4937              :     {
    4938          395 :     case FLOOR_MOD_EXPR:
    4939          395 :     case FLOOR_DIV_EXPR:
    4940          395 :     case CEIL_MOD_EXPR:
    4941          395 :     case CEIL_DIV_EXPR:
    4942          395 :       {
    4943          395 :         if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
    4944          363 :             || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
    4945          363 :             || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
    4946          363 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    4947          363 :             || !target_has_vecop_for_code (MINUS_EXPR, vectype)
    4948          363 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    4949          631 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    4950          159 :           return NULL;
    4951          236 :         if (unsigned_p)
    4952              :           {
    4953           18 :             gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4954              : 
    4955           18 :             if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
    4956              :               return NULL;
    4957           18 :             bool is_mod = rhs_code == CEIL_MOD_EXPR;
    4958              :             // r > 0
    4959           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    4960           18 :             def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
    4961              :                                             build_int_cst (itype, 0));
    4962           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    4963              :                                     itype);
    4964              : 
    4965              :             // (r > 0) ? y : 0 (mod)
    4966              :             // (r > 0) ? 1 : 0 (ceil)
    4967           18 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    4968           18 :             def_stmt
    4969           27 :               = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    4970            9 :                                      is_mod ? oprnd1 : build_int_cst (itype, 1),
    4971              :                                      build_int_cst (itype, 0));
    4972           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4973              : 
    4974              :             // r -= (r > 0) ? y : 0 (mod)
    4975              :             // d += (x^y < 0 && r) ? -1 : 0 (ceil)
    4976           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    4977           27 :             return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
    4978           18 :                                         is_mod ? r : q, extr_cond);
    4979              :           }
    4980              :         else
    4981              :           {
    4982          218 :             bool ceil_p
    4983          218 :               = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4984          218 :             if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
    4985              :               return NULL;
    4986              :             // x ^ y
    4987          218 :             tree xort = vect_recog_temp_ssa_var (itype, NULL);
    4988          218 :             def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
    4989          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4990              : 
    4991          218 :             tree cond_reg = xort;
    4992              :             // ~(x ^ y) (ceil)
    4993          218 :             if (ceil_p)
    4994              :               {
    4995           18 :                 cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    4996           18 :                 def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
    4997           18 :                 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4998              :               }
    4999              : 
    5000              :             // -r
    5001          218 :             tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
    5002          218 :             def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
    5003          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5004              : 
    5005              :             // r | -r , sign bit is set if r!=0
    5006          218 :             tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
    5007          218 :             def_stmt
    5008          218 :               = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
    5009          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5010              : 
    5011              :             // (x ^ y) & (r | -r)
    5012              :             // ~(x ^ y) & (r | -r) (ceil)
    5013          218 :             tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
    5014          218 :             def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
    5015              :                                             r_or_negr, cond_reg);
    5016          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5017              : 
    5018              :             // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
    5019          218 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5020          218 :             def_stmt
    5021          218 :               = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
    5022              :                                      build_int_cst (itype, 0));
    5023          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5024              :                                     itype);
    5025              : 
    5026              :             // (x^y < 0 && r) ? y : 0 (mod)
    5027              :             // (x^y < 0 && r) ? -1 : 0 (div)
    5028          218 :             bool is_mod
    5029          218 :               = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
    5030          218 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    5031          258 :             def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    5032              :                                             is_mod ? oprnd1
    5033           40 :                                                    : build_int_cst (itype, -1),
    5034              :                                             build_int_cst (itype, 0));
    5035          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5036              : 
    5037              :             // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
    5038              :             // d += (x^y < 0 && r) ? -1 : 0 (floor div)
    5039              :             // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
    5040              :             // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
    5041          218 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5042          436 :             return gimple_build_assign (result,
    5043          218 :                                         (rhs_code == FLOOR_MOD_EXPR
    5044          218 :                                          || rhs_code == FLOOR_DIV_EXPR)
    5045              :                                           ? PLUS_EXPR
    5046              :                                           : MINUS_EXPR,
    5047          218 :                                         is_mod ? r : q, extr_cond);
    5048              :           }
    5049              :       }
    5050           36 :     case ROUND_MOD_EXPR:
    5051           36 :     case ROUND_DIV_EXPR:
    5052           36 :       {
    5053           36 :         if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
    5054           36 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    5055           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    5056           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
    5057           72 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    5058            0 :           return NULL;
    5059              : 
    5060           36 :         bool is_mod = rhs_code == ROUND_MOD_EXPR;
    5061           36 :         HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5062           36 :         unsigned HOST_WIDE_INT abs_d
    5063              :           = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
    5064           36 :         unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
    5065           36 :         if (!unsigned_p)
    5066              :           {
    5067              :             // check availibility of abs expression for vector
    5068           18 :             if (!target_has_vecop_for_code (ABS_EXPR, vectype))
    5069              :               return NULL;
    5070              :             // abs (r)
    5071           18 :             tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
    5072           18 :             def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
    5073           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5074              : 
    5075              :             // abs (r) > (abs (y-1) >> 1)
    5076           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5077           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
    5078           18 :                                             build_int_cst (itype, mid_d));
    5079           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5080              :                                     itype);
    5081              : 
    5082              :             // x ^ y
    5083           18 :             tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    5084           18 :             def_stmt
    5085           18 :               = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
    5086           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5087              : 
    5088              :             // x ^ y < 0
    5089           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5090           18 :             def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
    5091              :                                             build_int_cst (itype, 0));
    5092           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5093              :                                     itype);
    5094              : 
    5095              :             // x ^ y < 0 ? y : -y (mod)
    5096              :             // x ^ y < 0 ? -1 : 1 (div)
    5097           18 :             tree val1 = vect_recog_temp_ssa_var (itype, NULL);
    5098           18 :             def_stmt
    5099           36 :               = gimple_build_assign (val1, COND_EXPR, bool_cond,
    5100           27 :                                      build_int_cst (itype, is_mod ? d : -1),
    5101           18 :                                      build_int_cst (itype, is_mod ? -d : 1));
    5102           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5103           18 :             int precision = TYPE_PRECISION (itype);
    5104           18 :             wide_int wmask = wi::mask (precision, false, precision);
    5105              : 
    5106              :             // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
    5107           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5108           36 :             def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
    5109           18 :                                             wide_int_to_tree (itype, wmask),
    5110              :                                             build_int_cst (itype, 0));
    5111           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5112              : 
    5113           18 :             tree fval = vect_recog_temp_ssa_var (itype, NULL);
    5114           18 :             def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
    5115           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5116              : 
    5117           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5118           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5119              :                                         fval);
    5120           18 :           }
    5121              :         else
    5122              :           {
    5123              :             // r > (y-1 >> 1)
    5124           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5125           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
    5126           18 :                                             build_int_cst (itype, mid_d));
    5127           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5128              :                                     itype);
    5129              : 
    5130              :             // (r > (y-1)>>1) ? -d : 1
    5131           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5132           18 :             def_stmt
    5133           36 :               = gimple_build_assign (val2, COND_EXPR, round_p,
    5134           18 :                                      build_int_cst (itype, is_mod ? -d : 1),
    5135              :                                      build_int_cst (itype, 0));
    5136           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5137              : 
    5138           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5139           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5140           18 :                                         val2);
    5141              :           }
    5142              :       }
    5143              :     default:
    5144              :       return NULL;
    5145              :     }
    5146              : }
    5147              : 
    5148              : /* Detect a signed division by a constant that wouldn't be
    5149              :    otherwise vectorized:
    5150              : 
    5151              :    type a_t, b_t;
    5152              : 
    5153              :    S1 a_t = b_t / N;
    5154              : 
    5155              :   where type 'type' is an integral type and N is a constant.
    5156              : 
    5157              :   Similarly handle modulo by a constant:
    5158              : 
    5159              :    S4 a_t = b_t % N;
    5160              : 
    5161              :   Input/Output:
    5162              : 
    5163              :   * STMT_VINFO: The stmt from which the pattern search begins,
    5164              :     i.e. the division stmt.  S1 is replaced by if N is a power
    5165              :     of two constant and type is signed:
    5166              :   S3  y_t = b_t < 0 ? N - 1 : 0;
    5167              :   S2  x_t = b_t + y_t;
    5168              :   S1' a_t = x_t >> log2 (N);
    5169              : 
    5170              :     S4 is replaced if N is a power of two constant and
    5171              :     type is signed by (where *_T temporaries have unsigned type):
    5172              :   S9  y_T = b_t < 0 ? -1U : 0U;
    5173              :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    5174              :   S7  z_t = (type) z_T;
    5175              :   S6  w_t = b_t + z_t;
    5176              :   S5  x_t = w_t & (N - 1);
    5177              :   S4' a_t = x_t - z_t;
    5178              : 
    5179              :   Output:
    5180              : 
    5181              :   * TYPE_OUT: The type of the output of this pattern.
    5182              : 
    5183              :   * Return value: A new stmt that will be used to replace the division
    5184              :     S1 or modulo S4 stmt.  */
    5185              : 
    5186              : static gimple *
    5187     30585700 : vect_recog_divmod_pattern (vec_info *vinfo,
    5188              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    5189              : {
    5190     30585700 :   gimple *last_stmt = stmt_vinfo->stmt;
    5191     30585700 :   tree oprnd0, oprnd1, vectype, itype, cond;
    5192     30585700 :   gimple *pattern_stmt = NULL;
    5193     30585700 :   gimple *def_stmt = NULL;
    5194     30585700 :   enum tree_code rhs_code;
    5195     30585700 :   optab optab;
    5196     30585700 :   tree q, cst;
    5197     30585700 :   int prec;
    5198              : 
    5199     30585700 :   if (!is_gimple_assign (last_stmt)
    5200              :       /* The pattern will disrupt the reduction chain with multiple uses.  */
    5201     30585700 :       || vect_is_reduction (stmt_vinfo))
    5202              :     return NULL;
    5203              : 
    5204     20802168 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5205     20802168 :   switch (rhs_code)
    5206              :     {
    5207       268963 :     case TRUNC_DIV_EXPR:
    5208       268963 :     case EXACT_DIV_EXPR:
    5209       268963 :     case TRUNC_MOD_EXPR:
    5210       268963 :     case FLOOR_MOD_EXPR:
    5211       268963 :     case FLOOR_DIV_EXPR:
    5212       268963 :     case CEIL_MOD_EXPR:
    5213       268963 :     case CEIL_DIV_EXPR:
    5214       268963 :     case ROUND_MOD_EXPR:
    5215       268963 :     case ROUND_DIV_EXPR:
    5216       268963 :       break;
    5217              :     default:
    5218              :       return NULL;
    5219              :     }
    5220              : 
    5221       268963 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5222       268963 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5223       268963 :   itype = TREE_TYPE (oprnd0);
    5224       268963 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5225       251326 :       || TREE_CODE (oprnd1) != INTEGER_CST
    5226       157417 :       || TREE_CODE (itype) != INTEGER_TYPE
    5227       426380 :       || !type_has_mode_precision_p (itype))
    5228       111546 :     return NULL;
    5229              : 
    5230       157417 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    5231       157417 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5232       157417 :   if (vectype == NULL_TREE)
    5233              :     return NULL;
    5234              : 
    5235       126061 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    5236              :     {
    5237              :       /* If the target can handle vectorized division or modulo natively,
    5238              :          don't attempt to optimize this, since native division is likely
    5239              :          to give smaller code.  */
    5240         2171 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    5241         2171 :       if (optab != unknown_optab
    5242         2171 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    5243              :         return NULL;
    5244              :     }
    5245              : 
    5246       126061 :   prec = TYPE_PRECISION (itype);
    5247              : 
    5248       252122 :   bool is_flclrd_moddiv_p
    5249       126061 :     = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
    5250              :     || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
    5251       125450 :     || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
    5252       126061 :   if (integer_pow2p (oprnd1))
    5253              :     {
    5254        75597 :       if ((TYPE_UNSIGNED (itype)
    5255           57 :            && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
    5256        75651 :           || tree_int_cst_sgn (oprnd1) != 1)
    5257            3 :         return NULL;
    5258              : 
    5259              :       /* Pattern detected.  */
    5260        75594 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5261              : 
    5262        75594 :       *type_out = vectype;
    5263              : 
    5264              :       /* Check if the target supports this internal function.  */
    5265        75594 :       internal_fn ifn = IFN_DIV_POW2;
    5266        75594 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    5267              :         {
    5268            0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    5269              : 
    5270            0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    5271            0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    5272            0 :           gimple_call_set_lhs (div_stmt, var_div);
    5273            0 :           if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5274              :             {
    5275            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    5276            0 :               tree t1 = vect_recog_temp_ssa_var (itype, NULL);
    5277            0 :               def_stmt
    5278            0 :                 = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
    5279            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5280            0 :               pattern_stmt
    5281            0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5282              :                                        MINUS_EXPR, oprnd0, t1);
    5283            0 :               if (is_flclrd_moddiv_p)
    5284              :                 {
    5285            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5286            0 :                   pattern_stmt
    5287            0 :                     = add_code_for_floorceilround_divmod (vectype, vinfo,
    5288              :                                                           stmt_vinfo, rhs_code,
    5289              :                                                           var_div, t1, oprnd0,
    5290              :                                                           oprnd1, itype);
    5291            0 :                   if (pattern_stmt == NULL)
    5292              :                     return NULL;
    5293              :                 }
    5294              :             }
    5295              :           else
    5296              :             pattern_stmt = div_stmt;
    5297            0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    5298              : 
    5299            0 :           return pattern_stmt;
    5300              :         }
    5301              : 
    5302        75594 :       cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5303        75594 :       def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
    5304              :                                       build_int_cst (itype, 0));
    5305        75594 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
    5306              :                               truth_type_for (vectype), itype);
    5307        75594 :       tree div_result = NULL_TREE;
    5308        75594 :       if (rhs_code == TRUNC_DIV_EXPR
    5309        75594 :           || rhs_code == EXACT_DIV_EXPR
    5310              :           || rhs_code == FLOOR_DIV_EXPR
    5311         2692 :           || rhs_code == CEIL_DIV_EXPR
    5312         2533 :           || rhs_code == ROUND_DIV_EXPR)
    5313              :         {
    5314        73073 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    5315        73073 :           tree shift;
    5316        73073 :           def_stmt
    5317        73073 :             = gimple_build_assign (var, COND_EXPR, cond,
    5318              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5319              :                                                 build_int_cst (itype, 1)),
    5320              :                                    build_int_cst (itype, 0));
    5321        73073 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5322        73073 :           var = vect_recog_temp_ssa_var (itype, NULL);
    5323        73073 :           def_stmt
    5324        73073 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    5325              :                                    gimple_assign_lhs (def_stmt));
    5326        73073 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5327              : 
    5328        73073 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    5329        73073 :           div_result = vect_recog_temp_ssa_var (itype, NULL);
    5330        73073 :           pattern_stmt
    5331        73073 :             = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
    5332              :         }
    5333        75594 :       if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5334              :         {
    5335         2692 :           if (rhs_code == FLOOR_DIV_EXPR
    5336              :               || rhs_code == CEIL_DIV_EXPR
    5337         2692 :               || rhs_code == ROUND_DIV_EXPR)
    5338          171 :             append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5339              : 
    5340         2692 :           tree signmask;
    5341         2692 :           if (compare_tree_int (oprnd1, 2) == 0)
    5342              :             {
    5343         1283 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5344         1283 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    5345              :                                               build_int_cst (itype, 1),
    5346              :                                               build_int_cst (itype, 0));
    5347         1283 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5348              :             }
    5349              :           else
    5350              :             {
    5351         1409 :               tree utype
    5352         1409 :                 = build_nonstandard_integer_type (prec, 1);
    5353         1409 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    5354         1409 :               tree shift
    5355         1409 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    5356         1409 :                                         - tree_log2 (oprnd1));
    5357         1409 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    5358              : 
    5359         1409 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    5360              :                                               build_int_cst (utype, -1),
    5361              :                                               build_int_cst (utype, 0));
    5362         1409 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5363         1409 :               var = vect_recog_temp_ssa_var (utype, NULL);
    5364         1409 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    5365              :                                               gimple_assign_lhs (def_stmt),
    5366              :                                               shift);
    5367         1409 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5368         1409 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5369         1409 :               def_stmt
    5370         1409 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    5371         1409 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5372              :             }
    5373         2692 :           def_stmt
    5374         2692 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5375              :                                    PLUS_EXPR, oprnd0, signmask);
    5376         2692 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5377         2692 :           def_stmt
    5378         2692 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5379              :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    5380              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5381              :                                                 build_int_cst (itype, 1)));
    5382         2692 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5383              : 
    5384         2692 :           tree r = vect_recog_temp_ssa_var (itype, NULL);
    5385         2692 :           pattern_stmt
    5386         2692 :             = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
    5387              :                                    signmask);
    5388         2692 :           if (is_flclrd_moddiv_p)
    5389              :             {
    5390          285 :               append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5391          285 :               pattern_stmt
    5392          285 :                 = add_code_for_floorceilround_divmod (vectype, vinfo,
    5393              :                                                       stmt_vinfo, rhs_code,
    5394              :                                                       div_result, r, oprnd0,
    5395              :                                                       oprnd1, itype);
    5396          285 :               if (pattern_stmt == NULL)
    5397              :                 return NULL;
    5398              :             }
    5399              :         }
    5400              : 
    5401        75435 :       return pattern_stmt;
    5402              :     }
    5403              : 
    5404        50464 :   if ((cst = uniform_integer_cst_p (oprnd1))
    5405        50464 :       && TYPE_UNSIGNED (itype)
    5406              :       && rhs_code == TRUNC_DIV_EXPR
    5407        29467 :       && vectype
    5408        68188 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    5409              :     {
    5410              :       /* We can use the relationship:
    5411              : 
    5412              :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    5413              : 
    5414              :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    5415              :          is therefore a shift right.  When operating in modes that are
    5416              :          multiples of a byte in size, there are two cases:
    5417              : 
    5418              :          (1) N(N+3) is not representable, in which case the question
    5419              :              becomes whether the replacement expression overflows.
    5420              :              It is enough to test that x+N+2 does not overflow,
    5421              :              i.e. that x < MAX-(N+1).
    5422              : 
    5423              :          (2) N(N+3) is representable, in which case it is the (only)
    5424              :              bound that we need to check.
    5425              : 
    5426              :          ??? For now we just handle the case where // (N+1) is a shift
    5427              :          right by half the precision, since some architectures can
    5428              :          optimize the associated addition and shift combinations
    5429              :          into single instructions.  */
    5430              : 
    5431        12041 :       auto wcst = wi::to_wide (cst);
    5432        12041 :       int pow = wi::exact_log2 (wcst + 1);
    5433        12041 :       if (pow == prec / 2)
    5434              :         {
    5435          468 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    5436              : 
    5437          468 :           gimple_ranger ranger;
    5438          468 :           int_range_max r;
    5439              : 
    5440              :           /* Check that no overflow will occur.  If we don't have range
    5441              :              information we can't perform the optimization.  */
    5442              : 
    5443          468 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    5444              :             {
    5445          466 :               wide_int max = r.upper_bound ();
    5446          466 :               wide_int one = wi::shwi (1, prec);
    5447          466 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    5448          466 :               wi::overflow_type ovf;
    5449          466 :               wi::add (max, adder, UNSIGNED, &ovf);
    5450          466 :               if (ovf == wi::OVF_NONE)
    5451              :                 {
    5452          304 :                   *type_out = vectype;
    5453          304 :                   tree tadder = wide_int_to_tree (itype, adder);
    5454          304 :                   tree rshift = wide_int_to_tree (itype, pow);
    5455              : 
    5456          304 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    5457          304 :                   gassign *patt1
    5458          304 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    5459          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5460              : 
    5461          304 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    5462          304 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    5463              :                                                rshift);
    5464          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5465              : 
    5466          304 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    5467          304 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    5468              :                                                oprnd0);
    5469          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5470              : 
    5471          304 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    5472          304 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    5473              :                                                       new_lhs3, rshift);
    5474              : 
    5475          304 :                   return pattern_stmt;
    5476              :                 }
    5477          466 :             }
    5478          468 :         }
    5479              :     }
    5480              : 
    5481        50160 :   if (prec > HOST_BITS_PER_WIDE_INT
    5482        50160 :       || integer_zerop (oprnd1))
    5483          274 :     return NULL;
    5484              : 
    5485        49886 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    5486              :     return NULL;
    5487              : 
    5488        14002 :   if (TYPE_UNSIGNED (itype))
    5489              :     {
    5490         8730 :       unsigned HOST_WIDE_INT mh, ml;
    5491         8730 :       int pre_shift, post_shift;
    5492         8730 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    5493         8730 :                                   & GET_MODE_MASK (itype_mode));
    5494         8730 :       tree t1, t2, t3, t4;
    5495              : 
    5496         8730 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    5497              :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    5498           23 :         return NULL;
    5499              : 
    5500              :       /* Find a suitable multiplier and right shift count instead of
    5501              :          directly dividing by D.  */
    5502         8707 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
    5503              : 
    5504              :       /* If the suggested multiplier is more than PREC bits, we can do better
    5505              :          for even divisors, using an initial right shift.  */
    5506         8707 :       if (mh != 0 && (d & 1) == 0)
    5507              :         {
    5508          246 :           pre_shift = ctz_or_zero (d);
    5509          246 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    5510              :                                   &ml, &post_shift);
    5511          246 :           gcc_assert (!mh);
    5512              :         }
    5513              :       else
    5514              :         pre_shift = 0;
    5515              : 
    5516          634 :       if (mh != 0)
    5517              :         {
    5518          634 :           if (post_shift - 1 >= prec)
    5519              :             return NULL;
    5520              : 
    5521              :           /* t1 = oprnd0 h* ml;
    5522              :              t2 = oprnd0 - t1;
    5523              :              t3 = t2 >> 1;
    5524              :              t4 = t1 + t3;
    5525              :              q = t4 >> (post_shift - 1);  */
    5526          634 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    5527          634 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5528          634 :                                           build_int_cst (itype, ml));
    5529          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5530              : 
    5531          634 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5532          634 :           def_stmt
    5533          634 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    5534          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5535              : 
    5536          634 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5537          634 :           def_stmt
    5538          634 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    5539          634 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5540              : 
    5541          634 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5542          634 :           def_stmt
    5543          634 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    5544              : 
    5545          634 :           if (post_shift != 1)
    5546              :             {
    5547          634 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5548              : 
    5549          634 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5550          634 :               pattern_stmt
    5551          634 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    5552          634 :                                        build_int_cst (itype, post_shift - 1));
    5553              :             }
    5554              :           else
    5555              :             {
    5556              :               q = t4;
    5557              :               pattern_stmt = def_stmt;
    5558              :             }
    5559              :         }
    5560              :       else
    5561              :         {
    5562         8073 :           if (pre_shift >= prec || post_shift >= prec)
    5563              :             return NULL;
    5564              : 
    5565              :           /* t1 = oprnd0 >> pre_shift;
    5566              :              t2 = t1 h* ml;
    5567              :              q = t2 >> post_shift;  */
    5568         8073 :           if (pre_shift)
    5569              :             {
    5570          246 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    5571          246 :               def_stmt
    5572          246 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    5573          246 :                                        build_int_cst (NULL, pre_shift));
    5574          246 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5575              :             }
    5576              :           else
    5577              :             t1 = oprnd0;
    5578              : 
    5579         8073 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5580         8073 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    5581         8073 :                                           build_int_cst (itype, ml));
    5582              : 
    5583         8073 :           if (post_shift)
    5584              :             {
    5585         8063 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5586              : 
    5587         8063 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5588         8063 :               def_stmt
    5589         8063 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    5590         8063 :                                        build_int_cst (itype, post_shift));
    5591              :             }
    5592              :           else
    5593              :             q = t2;
    5594              : 
    5595              :           pattern_stmt = def_stmt;
    5596              :         }
    5597              :     }
    5598              :   else
    5599              :     {
    5600         5272 :       unsigned HOST_WIDE_INT ml;
    5601         5272 :       int post_shift;
    5602         5272 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5603         5272 :       unsigned HOST_WIDE_INT abs_d;
    5604         5272 :       bool add = false;
    5605         5272 :       tree t1, t2, t3, t4;
    5606              : 
    5607              :       /* Give up for -1.  */
    5608         5272 :       if (d == -1)
    5609            0 :         return NULL;
    5610              : 
    5611              :       /* Since d might be INT_MIN, we have to cast to
    5612              :          unsigned HOST_WIDE_INT before negating to avoid
    5613              :          undefined signed overflow.  */
    5614         5272 :       abs_d = (d >= 0
    5615         5272 :                ? (unsigned HOST_WIDE_INT) d
    5616              :                : - (unsigned HOST_WIDE_INT) d);
    5617              : 
    5618              :       /* n rem d = n rem -d */
    5619         5272 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    5620              :         {
    5621            0 :           d = abs_d;
    5622            0 :           oprnd1 = build_int_cst (itype, abs_d);
    5623              :         }
    5624         5272 :       if (HOST_BITS_PER_WIDE_INT >= prec
    5625         5272 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    5626              :         /* This case is not handled correctly below.  */
    5627              :         return NULL;
    5628              : 
    5629         5272 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
    5630         5272 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    5631              :         {
    5632         1589 :           add = true;
    5633         1589 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    5634              :         }
    5635         5272 :       if (post_shift >= prec)
    5636              :         return NULL;
    5637              : 
    5638              :       /* t1 = oprnd0 h* ml;  */
    5639         5272 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5640         5272 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5641         5272 :                                       build_int_cst (itype, ml));
    5642              : 
    5643         5272 :       if (add)
    5644              :         {
    5645              :           /* t2 = t1 + oprnd0;  */
    5646         1589 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5647         1589 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5648         1589 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    5649              :         }
    5650              :       else
    5651              :         t2 = t1;
    5652              : 
    5653         5272 :       if (post_shift)
    5654              :         {
    5655              :           /* t3 = t2 >> post_shift;  */
    5656         4464 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5657         4464 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5658         4464 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    5659         4464 :                                           build_int_cst (itype, post_shift));
    5660              :         }
    5661              :       else
    5662              :         t3 = t2;
    5663              : 
    5664         5272 :       int msb = 1;
    5665         5272 :       int_range_max r;
    5666        10544 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    5667         5272 :       if (!r.varying_p () && !r.undefined_p ())
    5668              :         {
    5669         3058 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    5670              :             msb = 0;
    5671          726 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    5672              :             msb = -1;
    5673              :         }
    5674              : 
    5675         2332 :       if (msb == 0 && d >= 0)
    5676              :         {
    5677              :           /* q = t3;  */
    5678              :           q = t3;
    5679              :           pattern_stmt = def_stmt;
    5680              :         }
    5681              :       else
    5682              :         {
    5683              :           /* t4 = oprnd0 >> (prec - 1);
    5684              :              or if we know from VRP that oprnd0 >= 0
    5685              :              t4 = 0;
    5686              :              or if we know from VRP that oprnd0 < 0
    5687              :              t4 = -1;  */
    5688         3000 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5689         3000 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5690         3000 :           if (msb != 1)
    5691           68 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5692           68 :                                             build_int_cst (itype, msb));
    5693              :           else
    5694         2932 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5695         2932 :                                             build_int_cst (itype, prec - 1));
    5696         3000 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5697              : 
    5698              :           /* q = t3 - t4;  or q = t4 - t3;  */
    5699         3000 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5700         5820 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5701              :                                               d < 0 ? t3 : t4);
    5702              :         }
    5703         5272 :     }
    5704              : 
    5705        13979 :   if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5706              :     {
    5707         6735 :       tree r, t1;
    5708              : 
    5709              :       /* We divided.  Now finish by:
    5710              :          t1 = q * oprnd1;
    5711              :          r = oprnd0 - t1;  */
    5712         6735 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5713              : 
    5714         6735 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5715         6735 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5716         6735 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5717              : 
    5718         6735 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5719         6735 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5720              : 
    5721         6735 :       if (is_flclrd_moddiv_p)
    5722              :         {
    5723          146 :         append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5724          146 :         pattern_stmt
    5725          146 :           = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
    5726              :                                                 rhs_code, q, r, oprnd0, oprnd1,
    5727              :                                                 itype);
    5728          146 :         if (pattern_stmt == NULL)
    5729              :           return NULL;
    5730              :         }
    5731              :     }
    5732              : 
    5733              :   /* Pattern detected.  */
    5734        13979 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5735              : 
    5736        13979 :   *type_out = vectype;
    5737        13979 :   return pattern_stmt;
    5738              : }
    5739              : 
    5740              : /* Detects pattern with a modulo operation (S1) where both arguments
    5741              :    are variables of integral type.
    5742              :    The statement is replaced by division, multiplication, and subtraction.
    5743              :    The last statement (S4) is returned.
    5744              : 
    5745              :    Example:
    5746              :    S1 c_t = a_t % b_t;
    5747              : 
    5748              :    is replaced by
    5749              :    S2 x_t = a_t / b_t;
    5750              :    S3 y_t = x_t * b_t;
    5751              :    S4 z_t = a_t - y_t;  */
    5752              : 
    5753              : static gimple *
    5754     30769739 : vect_recog_mod_var_pattern (vec_info *vinfo,
    5755              :                             stmt_vec_info stmt_vinfo, tree *type_out)
    5756              : {
    5757     30769739 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5758     30769739 :   tree oprnd0, oprnd1, vectype, itype;
    5759     30769739 :   gimple *pattern_stmt, *def_stmt;
    5760     30769739 :   enum tree_code rhs_code;
    5761              : 
    5762     30769739 :   if (!is_gimple_assign (last_stmt) || vect_is_reduction (stmt_vinfo))
    5763              :     return NULL;
    5764              : 
    5765     20986207 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5766     20986207 :   if (rhs_code != TRUNC_MOD_EXPR)
    5767              :     return NULL;
    5768              : 
    5769        68619 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5770        68619 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5771        68619 :   itype = TREE_TYPE (oprnd0);
    5772        68619 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5773        60272 :       || TREE_CODE (oprnd1) != SSA_NAME
    5774        43791 :       || TREE_CODE (itype) != INTEGER_TYPE)
    5775              :     return NULL;
    5776              : 
    5777        43664 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5778              : 
    5779        43664 :   if (!vectype
    5780        35660 :       || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
    5781        35660 :       || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
    5782            0 :       || !target_has_vecop_for_code (MULT_EXPR, vectype)
    5783        43664 :       || !target_has_vecop_for_code (MINUS_EXPR, vectype))
    5784        43664 :     return NULL;
    5785              : 
    5786            0 :   tree q, tmp, r;
    5787            0 :   q = vect_recog_temp_ssa_var (itype, NULL);
    5788            0 :   def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
    5789            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5790              : 
    5791            0 :   tmp = vect_recog_temp_ssa_var (itype, NULL);
    5792            0 :   def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
    5793            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5794              : 
    5795            0 :   r = vect_recog_temp_ssa_var (itype, NULL);
    5796            0 :   pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
    5797              : 
    5798              :   /* Pattern detected.  */
    5799            0 :   *type_out = vectype;
    5800            0 :   vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
    5801              : 
    5802            0 :   return pattern_stmt;
    5803              : }
    5804              : 
    5805              : 
    5806              : /* Return the proper type for converting bool VAR into
    5807              :    an integer value or NULL_TREE if no such type exists.
    5808              :    The type is chosen so that the converted value has the
    5809              :    same number of elements as VAR's vector type.  */
    5810              : 
    5811              : static tree
    5812      4398402 : integer_type_for_mask (tree var, vec_info *vinfo, vect_def_type *dt = nullptr)
    5813              : {
    5814      4398402 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5815              :     return NULL_TREE;
    5816              : 
    5817      2004230 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (var);
    5818      2004230 :   if (dt)
    5819              :     {
    5820       342820 :       if (!def_stmt_info)
    5821         3461 :         *dt = vect_external_def;
    5822              :       else
    5823       339359 :         *dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
    5824              :     }
    5825       342820 :   if (!def_stmt_info
    5826      1915331 :       || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
    5827      3576741 :       || !vect_use_mask_type_p (def_stmt_info))
    5828       776820 :     return NULL_TREE;
    5829              : 
    5830      1227410 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5831              : }
    5832              : 
    5833              : /* Function vect_recog_gcond_pattern
    5834              : 
    5835              :    Try to find pattern like following:
    5836              : 
    5837              :      if (a op b)
    5838              : 
    5839              :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5840              : 
    5841              :      mask = a op b
    5842              :      if (mask != 0)
    5843              : 
    5844              :    and set the mask type on MASK.
    5845              : 
    5846              :    Input:
    5847              : 
    5848              :    * STMT_VINFO: The stmt at the end from which the pattern
    5849              :                  search begins, i.e. cast of a bool to
    5850              :                  an integer type.
    5851              : 
    5852              :    Output:
    5853              : 
    5854              :    * TYPE_OUT: The type of the output of this pattern.
    5855              : 
    5856              :    * Return value: A new stmt that will be used to replace the pattern.  */
    5857              : 
    5858              : static gimple *
    5859     30840058 : vect_recog_gcond_pattern (vec_info *vinfo,
    5860              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5861              : {
    5862              :   /* Currently we only support this for loop vectorization and when multiple
    5863              :      exits.  */
    5864     30840058 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5865      4469656 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5866              :     return NULL;
    5867              : 
    5868      1631342 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5869      1631342 :   gcond* cond = NULL;
    5870     30861997 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5871              :     return NULL;
    5872              : 
    5873       380285 :   auto lhs = gimple_cond_lhs (cond);
    5874       380285 :   auto rhs = gimple_cond_rhs (cond);
    5875       380285 :   auto code = gimple_cond_code (cond);
    5876              : 
    5877       380285 :   tree scalar_type = TREE_TYPE (lhs);
    5878       380285 :   if (VECTOR_TYPE_P (scalar_type))
    5879              :     return NULL;
    5880              : 
    5881              :   /* If the input is a boolean then try to figure out the precision that the
    5882              :      vector type should use.  We cannot use the scalar precision as this would
    5883              :      later mismatch.  This is similar to what recog_bool does.  */
    5884       380285 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5885              :     {
    5886        10243 :       if (tree stype = integer_type_for_mask (lhs, vinfo))
    5887       380285 :         scalar_type = stype;
    5888              :     }
    5889              : 
    5890       380285 :   tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
    5891       380285 :   if (vectype == NULL_TREE)
    5892              :     return NULL;
    5893              : 
    5894       358346 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5895       358346 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5896       358346 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5897              : 
    5898       358346 :   gimple *pattern_stmt
    5899       358346 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5900       358346 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5901              :                          NULL_TREE, NULL_TREE);
    5902       358346 :   *type_out = vectype;
    5903       358346 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5904       358346 :   return pattern_stmt;
    5905              : }
    5906              : 
    5907              : 
    5908              : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5909              :    conversion of MASK to a type suitable for masking VECTYPE.
    5910              :    Built statement gets required vectype and is appended to
    5911              :    a pattern sequence of STMT_VINFO.
    5912              : 
    5913              :    Return converted mask.  */
    5914              : 
    5915              : static tree
    5916       125468 : build_mask_conversion (vec_info *vinfo,
    5917              :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5918              : {
    5919       125468 :   gimple *stmt;
    5920       125468 :   tree masktype, tmp;
    5921              : 
    5922       125468 :   masktype = truth_type_for (vectype);
    5923       125468 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5924       125468 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5925       125468 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5926       125468 :                           stmt, masktype, TREE_TYPE (vectype));
    5927              : 
    5928       125468 :   return tmp;
    5929              : }
    5930              : 
    5931              : 
    5932              : /* Return MASK if MASK is suitable for masking an operation on vectors
    5933              :    of type VECTYPE, otherwise convert it into such a form and return
    5934              :    the result.  Associate any conversion statements with STMT_INFO's
    5935              :    pattern.  */
    5936              : 
    5937              : static tree
    5938        72854 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    5939              :                                stmt_vec_info stmt_info, vec_info *vinfo)
    5940              : {
    5941        72854 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    5942        72854 :   if (mask_type)
    5943              :     {
    5944        72854 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    5945        72854 :       if (mask_vectype
    5946       145708 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    5947        87017 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    5948        58691 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    5949              :     }
    5950        72854 :   return mask;
    5951              : }
    5952              : 
    5953              : 
    5954              : /* Function vect_recog_bool_pattern
    5955              : 
    5956              :    Try to find pattern like following:
    5957              : 
    5958              :      bool a_b, b_b, c_b, d_b, e_b;
    5959              :      TYPE f_T;
    5960              :    loop:
    5961              :      S1  a_b = x1 CMP1 y1;
    5962              :      S2  b_b = x2 CMP2 y2;
    5963              :      S3  c_b = a_b & b_b;
    5964              :      S4  d_b = x3 CMP3 y3;
    5965              :      S5  e_b = c_b | d_b;
    5966              :      S6  f_T = (TYPE) e_b;
    5967              : 
    5968              :    where type 'TYPE' is an integral type.  Or a similar pattern
    5969              :    ending in
    5970              : 
    5971              :      S6  f_Y = e_b ? r_Y : s_Y;
    5972              : 
    5973              :    as results from if-conversion of a complex condition.
    5974              : 
    5975              :    Input:
    5976              : 
    5977              :    * STMT_VINFO: The stmt at the end from which the pattern
    5978              :                  search begins, i.e. cast of a bool to
    5979              :                  an integer type.
    5980              : 
    5981              :    Output:
    5982              : 
    5983              :    * TYPE_OUT: The type of the output of this pattern.
    5984              : 
    5985              :    * Return value: A new stmt that will be used to replace the pattern.
    5986              : 
    5987              :         Assuming size of TYPE is the same as size of all comparisons
    5988              :         (otherwise some casts would be added where needed), the above
    5989              :         sequence we create related pattern stmts:
    5990              :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5991              :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5992              :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5993              :         S5'  e_T = c_T | d_T;
    5994              :         S6'  f_T = e_T;
    5995              : 
    5996              :         Instead of the above S3' we could emit:
    5997              :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5998              :         S3'  c_T = a_T | b_T;
    5999              :         but the above is more efficient.  */
    6000              : 
    6001              : static gimple *
    6002     30840058 : vect_recog_bool_pattern (vec_info *vinfo,
    6003              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    6004              : {
    6005     30840058 :   gimple *last_stmt = stmt_vinfo->stmt;
    6006     30840058 :   enum tree_code rhs_code;
    6007     30840058 :   tree var, lhs, rhs, vectype;
    6008     30840058 :   gimple *pattern_stmt;
    6009              : 
    6010     30840058 :   if (!is_gimple_assign (last_stmt))
    6011              :     return NULL;
    6012              : 
    6013     21522919 :   var = gimple_assign_rhs1 (last_stmt);
    6014     21522919 :   lhs = gimple_assign_lhs (last_stmt);
    6015     21522919 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6016              : 
    6017     21522919 :   if (rhs_code == VIEW_CONVERT_EXPR)
    6018       189603 :     var = TREE_OPERAND (var, 0);
    6019              : 
    6020     21522919 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    6021              :     return NULL;
    6022              : 
    6023       728663 :   hash_set<gimple *> bool_stmts;
    6024              : 
    6025       728663 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    6026              :       || rhs_code == VIEW_CONVERT_EXPR
    6027              :       || rhs_code == FLOAT_EXPR)
    6028              :     {
    6029       174237 :       if (! (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    6030         2235 :              || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    6031       172536 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6032              :         return NULL;
    6033        81647 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6034              : 
    6035        81647 :       tree type = integer_type_for_mask (var, vinfo);
    6036        81647 :       tree cst0, cst1, tmp;
    6037              : 
    6038        81647 :       if (!type)
    6039              :         return NULL;
    6040              : 
    6041              :       /* We may directly use cond with narrowed type to avoid multiple cond
    6042              :          exprs with following result packing and perform single cond with
    6043              :          packed mask instead.  In case of widening we better make cond first
    6044              :          and then extract results.  */
    6045        42038 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    6046        29294 :         type = TREE_TYPE (lhs);
    6047              : 
    6048        42038 :       cst0 = build_int_cst (type, 0);
    6049        42038 :       cst1 = build_int_cst (type, 1);
    6050        42038 :       tmp = vect_recog_temp_ssa_var (type, NULL);
    6051        42038 :       pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    6052              : 
    6053        42038 :       if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    6054              :         {
    6055        12744 :           tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    6056        12744 :           append_pattern_def_seq (vinfo, stmt_vinfo,
    6057              :                                   pattern_stmt, new_vectype);
    6058              : 
    6059        12744 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6060        12744 :           pattern_stmt
    6061        25174 :             = gimple_build_assign (lhs, (rhs_code == FLOAT_EXPR
    6062              :                                          ? FLOAT_EXPR : CONVERT_EXPR), tmp);
    6063              :         }
    6064              : 
    6065        42038 :       *type_out = vectype;
    6066        42038 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6067              : 
    6068        42038 :       return pattern_stmt;
    6069              :     }
    6070              :   else if (rhs_code == COND_EXPR
    6071       207305 :            && TREE_CODE (var) == SSA_NAME)
    6072              :     {
    6073       207305 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6074       207305 :       if (vectype == NULL_TREE)
    6075              :         return NULL;
    6076              : 
    6077              :       /* Build a scalar type for the boolean result that when
    6078              :          vectorized matches the vector type of the result in
    6079              :          size and number of elements.  */
    6080       193077 :       unsigned prec
    6081       193077 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    6082              :                                TYPE_VECTOR_SUBPARTS (vectype));
    6083              : 
    6084       193077 :       tree type
    6085       386154 :         = build_nonstandard_integer_type (prec,
    6086       193077 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    6087       193077 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    6088              :         return NULL;
    6089              : 
    6090       193077 :       enum vect_def_type dt;
    6091       193077 :       if (integer_type_for_mask (var, vinfo))
    6092              :         return NULL;
    6093        35920 :       else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
    6094        35920 :                && vect_is_simple_use (var, vinfo, &dt)
    6095        35920 :                && (dt == vect_external_def
    6096        35913 :                    || dt == vect_constant_def))
    6097              :         {
    6098              :           /* If the condition is already a boolean then manually convert it to a
    6099              :              mask of the given integer type but don't set a vectype.  */
    6100         1373 :           tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
    6101         1373 :           pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
    6102              :                                               build_all_ones_cst (type),
    6103              :                                               build_zero_cst (type));
    6104         1373 :           append_inv_pattern_def_seq (vinfo, pattern_stmt);
    6105         1373 :           var = lhs_ivar;
    6106              :         }
    6107              : 
    6108        35920 :       tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6109        35920 :       pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
    6110        35920 :                                           build_zero_cst (TREE_TYPE (var)));
    6111              : 
    6112        35920 :       tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
    6113        35920 :       if (!new_vectype)
    6114              :         return NULL;
    6115              : 
    6116        35920 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
    6117        35920 :                               TREE_TYPE (var));
    6118              : 
    6119        35920 :       lhs_var = vect_convert_mask_for_vectype (lhs_var, vectype, stmt_vinfo,
    6120              :                                                vinfo);
    6121              : 
    6122        35920 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6123        35920 :       pattern_stmt
    6124        35920 :         = gimple_build_assign (lhs, COND_EXPR, lhs_var,
    6125              :                                gimple_assign_rhs2 (last_stmt),
    6126              :                                gimple_assign_rhs3 (last_stmt));
    6127        35920 :       *type_out = vectype;
    6128        35920 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6129              : 
    6130        35920 :       return pattern_stmt;
    6131              :     }
    6132       433874 :   else if (rhs_code == BIT_NOT_EXPR
    6133       433874 :            && !vect_use_mask_type_p (stmt_vinfo))
    6134              :     {
    6135              :       /* When we have a bool data inversion rewrite that to an XOR to
    6136              :          cope with the fact that we'll use a wider vector element type.  */
    6137         8851 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6138         8851 :       pattern_stmt
    6139         8851 :         = gimple_build_assign (lhs, BIT_XOR_EXPR, var,
    6140         8851 :                                build_all_ones_cst (TREE_TYPE (var)));
    6141         8851 :       *type_out = NULL_TREE;
    6142         8851 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6143              : 
    6144         8851 :       return pattern_stmt;
    6145              :     }
    6146       425023 :   else if ((rhs_code == BIT_XOR_EXPR
    6147              :             || rhs_code == BIT_AND_EXPR
    6148       425023 :             || rhs_code == BIT_IOR_EXPR)
    6149       332311 :            && TREE_CODE (var) == SSA_NAME)
    6150              :     {
    6151       332311 :       tree rhs2 = gimple_assign_rhs2 (last_stmt);
    6152       332311 :       if (TREE_CODE (rhs2) != SSA_NAME)
    6153              :         return NULL;
    6154       332311 :       tree lhs_type = integer_type_for_mask (lhs, vinfo);
    6155       332311 :       if (!lhs_type)
    6156              :         return NULL;
    6157       171410 :       vectype = get_mask_type_for_scalar_type (vinfo, lhs_type);
    6158       171410 :       if (!vectype)
    6159              :         return NULL;
    6160       171410 :       vect_def_type dt1, dt2;
    6161       171410 :       tree rhs1_type = integer_type_for_mask (var, vinfo, &dt1);
    6162       171410 :       tree rhs2_type = integer_type_for_mask (rhs2, vinfo, &dt2);
    6163       171410 :       if ((rhs1_type || dt1 == vect_external_def)
    6164       157698 :           && (rhs2_type || dt2 == vect_external_def))
    6165              :         return NULL;
    6166              :       /* When one input is a mask and the other is not create a pattern
    6167              :          stmt sequence that creates a mask for the non-mask input and
    6168              :          convert it to one suitable for the output mask used.  */
    6169        32501 :       if (rhs1_type && !rhs2_type)
    6170              :         {
    6171        18789 :           tree rhs1_vectype = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6172        18789 :           if (!rhs1_vectype)
    6173              :             return NULL;
    6174        18789 :           tree rhs2_vectype = get_vectype_for_scalar_type (vinfo,
    6175        18789 :                                                            TREE_TYPE (rhs2));
    6176        18789 :           if (!rhs2_vectype)
    6177              :             return NULL;
    6178        18789 :           tree new_vectype = truth_type_for (rhs2_vectype);
    6179        18789 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6180        18789 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, rhs2,
    6181              :                                               build_zero_cst
    6182        18789 :                                                 (TREE_TYPE (rhs2)));
    6183        18789 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6184        18789 :                                   new_vectype, TREE_TYPE (new_vectype));
    6185        18789 :           rhs2 = vect_convert_mask_for_vectype (tem, rhs1_vectype,
    6186              :                                                 stmt_vinfo, vinfo);
    6187              :         }
    6188        13712 :       else if (!rhs1_type && rhs2_type)
    6189              :         {
    6190        13712 :           tree rhs2_vectype = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6191        13712 :           if (!rhs2_vectype)
    6192              :             return NULL;
    6193        13712 :           tree rhs1_vectype = get_vectype_for_scalar_type (vinfo,
    6194        13712 :                                                            TREE_TYPE (var));
    6195        13712 :           if (!rhs1_vectype)
    6196              :             return NULL;
    6197        13712 :           tree new_vectype = truth_type_for (rhs1_vectype);
    6198        13712 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6199        13712 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, var,
    6200              :                                               build_zero_cst
    6201        13712 :                                                 (TREE_TYPE (var)));
    6202        13712 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6203        13712 :                                   new_vectype, TREE_TYPE (new_vectype));
    6204        13712 :           var = vect_convert_mask_for_vectype (tem, rhs2_vectype,
    6205              :                                                stmt_vinfo, vinfo);
    6206              :         }
    6207        32501 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6208        32501 :       pattern_stmt = gimple_build_assign (lhs, rhs_code, var, rhs2);
    6209        32501 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6210        32501 :       *type_out = vectype;
    6211        32501 :       return pattern_stmt;
    6212              :     }
    6213        92712 :   else if (rhs_code == SSA_NAME
    6214        26117 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    6215              :     {
    6216         7807 :       stmt_vec_info pattern_stmt_info;
    6217         7807 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6218         7807 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    6219            0 :         return NULL;
    6220              : 
    6221         7807 :       tree type = integer_type_for_mask (var, vinfo);
    6222         7807 :       if (!type)
    6223              :         return NULL;
    6224              : 
    6225         4433 :       var = vect_convert_mask_for_vectype (var, vectype, stmt_vinfo, vinfo);
    6226              : 
    6227         4433 :       tree cst0 = build_int_cst (TREE_TYPE (vectype), 0);
    6228         4433 :       tree cst1 = build_int_cst (TREE_TYPE (vectype), 1);
    6229         4433 :       rhs = vect_recog_temp_ssa_var (TREE_TYPE (vectype), NULL);
    6230         4433 :       pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    6231         4433 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype);
    6232              : 
    6233         4433 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    6234         4433 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    6235         4433 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6236         4433 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6237         4433 :       *type_out = vectype;
    6238         4433 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6239              : 
    6240         4433 :       return pattern_stmt;
    6241              :     }
    6242              :   else
    6243              :     return NULL;
    6244       728663 : }
    6245              : 
    6246              : 
    6247              : /* Function vect_recog_mask_conversion_pattern
    6248              : 
    6249              :    Try to find statements which require boolean type
    6250              :    converison.  Additional conversion statements are
    6251              :    added to handle such cases.  For example:
    6252              : 
    6253              :    bool m_1, m_2, m_3;
    6254              :    int i_4, i_5;
    6255              :    double d_6, d_7;
    6256              :    char c_1, c_2, c_3;
    6257              : 
    6258              :    S1   m_1 = i_4 > i_5;
    6259              :    S2   m_2 = d_6 < d_7;
    6260              :    S3   m_3 = m_1 & m_2;
    6261              :    S4   c_1 = m_3 ? c_2 : c_3;
    6262              : 
    6263              :    Will be transformed into:
    6264              : 
    6265              :    S1   m_1 = i_4 > i_5;
    6266              :    S2   m_2 = d_6 < d_7;
    6267              :    S3'' m_2' = (_Bool[bitsize=32])m_2
    6268              :    S3'  m_3' = m_1 & m_2';
    6269              :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    6270              :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    6271              : 
    6272              : static gimple *
    6273     30860773 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    6274              :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    6275              : {
    6276     30860773 :   gimple *last_stmt = stmt_vinfo->stmt;
    6277     30860773 :   enum tree_code rhs_code;
    6278     30860773 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    6279     30860773 :   tree vectype1, vectype2;
    6280     30860773 :   stmt_vec_info pattern_stmt_info;
    6281              : 
    6282              :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    6283              :      conversion.  */
    6284     30860773 :   if (is_gimple_call (last_stmt)
    6285     30860773 :       && gimple_call_internal_p (last_stmt))
    6286              :     {
    6287       110889 :       gcall *pattern_stmt;
    6288              : 
    6289       110889 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    6290       110889 :       int mask_argno = internal_fn_mask_index (ifn);
    6291       110889 :       if (mask_argno < 0)
    6292              :         return NULL;
    6293              : 
    6294        15298 :       bool store_p = internal_store_fn_p (ifn);
    6295        15298 :       bool load_p = internal_store_fn_p (ifn);
    6296        15298 :       if (store_p)
    6297              :         {
    6298         2593 :           int rhs_index = internal_fn_stored_value_index (ifn);
    6299         2593 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    6300         2593 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    6301              :         }
    6302              :       else
    6303              :         {
    6304        12705 :           lhs = gimple_call_lhs (last_stmt);
    6305        12705 :           if (!lhs)
    6306              :             return NULL;
    6307        12705 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6308              :         }
    6309              : 
    6310        15298 :       if (!vectype1)
    6311              :         return NULL;
    6312              : 
    6313        15018 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    6314        15018 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    6315        15018 :       if (mask_arg_type)
    6316              :         {
    6317        13321 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    6318              : 
    6319        13321 :           if (!vectype2
    6320        13321 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6321              :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    6322         8460 :             return NULL;
    6323              :         }
    6324         1697 :       else if (store_p || load_p)
    6325              :         return NULL;
    6326              : 
    6327         6262 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    6328              : 
    6329         6262 :       auto_vec<tree, 8> args;
    6330         6262 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    6331         6262 :       args.safe_grow (nargs, true);
    6332        31310 :       for (unsigned int i = 0; i < nargs; ++i)
    6333        25048 :         args[i] = ((int) i == mask_argno
    6334        25048 :                    ? tmp
    6335        18786 :                    : gimple_call_arg (last_stmt, i));
    6336         6262 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    6337              : 
    6338         6262 :       if (!store_p)
    6339              :         {
    6340         5990 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6341         5990 :           gimple_call_set_lhs (pattern_stmt, lhs);
    6342              :         }
    6343              : 
    6344         5990 :       if (load_p || store_p)
    6345          272 :         gimple_call_set_nothrow (pattern_stmt, true);
    6346              : 
    6347         6262 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6348         6262 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    6349         1952 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6350              : 
    6351         6262 :       *type_out = vectype1;
    6352         6262 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6353              : 
    6354         6262 :       return pattern_stmt;
    6355         6262 :     }
    6356              : 
    6357     30749884 :   if (!is_gimple_assign (last_stmt))
    6358              :     return NULL;
    6359              : 
    6360     21543634 :   gimple *pattern_stmt;
    6361     21543634 :   lhs = gimple_assign_lhs (last_stmt);
    6362     21543634 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    6363     21543634 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6364              : 
    6365              :   /* Check for cond expression requiring mask conversion.  */
    6366     21543634 :   if (rhs_code == COND_EXPR)
    6367              :     {
    6368       188751 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6369              : 
    6370       188751 :       gcc_assert (! COMPARISON_CLASS_P (rhs1));
    6371       188751 :       if (TREE_CODE (rhs1) == SSA_NAME)
    6372              :         {
    6373       188751 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6374       188751 :           if (!rhs1_type)
    6375              :             return NULL;
    6376              :         }
    6377              :       else
    6378              :         return NULL;
    6379              : 
    6380       176665 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6381              : 
    6382       176665 :       if (!vectype1 || !vectype2)
    6383              :         return NULL;
    6384              : 
    6385              :       /* Continue if a conversion is needed.  Also continue if we have
    6386              :          a comparison whose vector type would normally be different from
    6387              :          VECTYPE2 when considered in isolation.  In that case we'll
    6388              :          replace the comparison with an SSA name (so that we can record
    6389              :          its vector type) and behave as though the comparison was an SSA
    6390              :          name from the outset.  */
    6391       174503 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6392              :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6393              :         return NULL;
    6394              : 
    6395        44823 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    6396        89646 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6397        44823 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6398              :       else
    6399              :         tmp = rhs1;
    6400              : 
    6401        44823 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6402        44823 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    6403              :                                           gimple_assign_rhs2 (last_stmt),
    6404              :                                           gimple_assign_rhs3 (last_stmt));
    6405              : 
    6406        44823 :       *type_out = vectype1;
    6407        44823 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6408              : 
    6409        44823 :       return pattern_stmt;
    6410              :     }
    6411              : 
    6412              :   /* Now check for binary boolean operations requiring conversion for
    6413              :      one of operands.  */
    6414     21354883 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6415              :     return NULL;
    6416              : 
    6417      1754189 :   if (rhs_code != BIT_IOR_EXPR
    6418              :       && rhs_code != BIT_XOR_EXPR
    6419      1754189 :       && rhs_code != BIT_AND_EXPR
    6420      1454379 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    6421              :     return NULL;
    6422              : 
    6423      1576937 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    6424              : 
    6425      1576937 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6426      1576937 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    6427              : 
    6428      1576937 :   if (!rhs1_type || !rhs2_type
    6429      1576937 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    6430              :     return NULL;
    6431              : 
    6432        15692 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    6433              :     {
    6434         9864 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6435         9864 :       if (!vectype1)
    6436              :         return NULL;
    6437         9864 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    6438              :     }
    6439              :   else
    6440              :     {
    6441         5828 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6442         5828 :       if (!vectype1)
    6443              :         return NULL;
    6444         5828 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6445              :     }
    6446              : 
    6447        15692 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6448        15692 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    6449              : 
    6450        15692 :   *type_out = vectype1;
    6451        15692 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6452              : 
    6453        15692 :   return pattern_stmt;
    6454              : }
    6455              : 
    6456              : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    6457              :    the boolean condition under which it occurs, otherwise return null.  */
    6458              : 
    6459              : static tree
    6460        99720 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    6461              : {
    6462        99720 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    6463              :     {
    6464        98239 :       gcc_assert (gimple_assign_single_p (def_assign));
    6465              :       return NULL_TREE;
    6466              :     }
    6467              : 
    6468         1481 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    6469              :     {
    6470         1481 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    6471         1481 :       int mask_index = internal_fn_mask_index (ifn);
    6472         1481 :       return gimple_call_arg (def_call, mask_index);
    6473              :     }
    6474              : 
    6475            0 :   gcc_unreachable ();
    6476              : }
    6477              : 
    6478              : /* Return the equivalent of:
    6479              : 
    6480              :      fold_convert (TYPE, VALUE)
    6481              : 
    6482              :    with the expectation that the operation will be vectorized.
    6483              :    If new statements are needed, add them as pattern statements
    6484              :    to STMT_INFO.  */
    6485              : 
    6486              : static tree
    6487            0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    6488              :                                 tree type, tree value, stmt_vec_info stmt_info)
    6489              : {
    6490            0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    6491              :     return value;
    6492              : 
    6493            0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    6494            0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    6495            0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    6496              :                           get_vectype_for_scalar_type (vinfo, type));
    6497            0 :   return new_value;
    6498              : }
    6499              : 
    6500              : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    6501              :    internal function.  Return the final statement on success and set
    6502              :    *TYPE_OUT to the vector type being loaded or stored.
    6503              : 
    6504              :    This function only handles gathers and scatters that were recognized
    6505              :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    6506              : 
    6507              : static gimple *
    6508     30860773 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    6509              :                                    stmt_vec_info stmt_info, tree *type_out)
    6510              : {
    6511              :   /* Currently we only support this for loop vectorization.  */
    6512     35343061 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6513      4482288 :   if (!loop_vinfo)
    6514              :     return NULL;
    6515              : 
    6516              :   /* Make sure that we're looking at a gather load or scatter store.  */
    6517      4482288 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    6518      4482288 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    6519              :     return NULL;
    6520              : 
    6521              :   /* Get the boolean that controls whether the load or store happens.
    6522              :      This is null if the operation is unconditional.  */
    6523        99720 :   tree mask = vect_get_load_store_mask (stmt_info);
    6524              : 
    6525              :   /* DR analysis nailed down the vector type for the access.  */
    6526        99720 :   tree gs_vectype = STMT_VINFO_VECTYPE (stmt_info);
    6527              : 
    6528              :   /* Make sure that the target supports an appropriate internal
    6529              :      function for the gather/scatter operation.  */
    6530        99720 :   gather_scatter_info gs_info;
    6531        99720 :   if (!vect_check_gather_scatter (stmt_info, gs_vectype, loop_vinfo, &gs_info)
    6532        99720 :       || gs_info.ifn == IFN_LAST)
    6533              :     return NULL;
    6534              : 
    6535              :   /* Convert the mask to the right form.  */
    6536            0 :   if (mask)
    6537            0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    6538              :                                           loop_vinfo);
    6539            0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6540            0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6541            0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6542            0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6543            0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6544              : 
    6545              :   /* Get the invariant base and non-invariant offset, converting the
    6546              :      latter to the same width as the vector elements.  */
    6547            0 :   tree base = gs_info.base;
    6548            0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6549            0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6550              :                                                 gs_info.offset, stmt_info);
    6551              : 
    6552              :   /* Build the new pattern statement.  */
    6553            0 :   tree scale = size_int (gs_info.scale);
    6554            0 :   gcall *pattern_stmt;
    6555              : 
    6556            0 :   if (DR_IS_READ (dr))
    6557              :     {
    6558            0 :       tree zero = build_zero_cst (gs_info.element_type);
    6559            0 :       if (mask != NULL)
    6560              :         {
    6561            0 :           int elsval = MASK_LOAD_ELSE_ZERO;
    6562              : 
    6563            0 :           tree vec_els
    6564            0 :             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
    6565            0 :           pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
    6566              :                                                      gs_info.alias_ptr,
    6567              :                                                      offset, scale, zero, mask,
    6568              :                                                      vec_els);
    6569              :         }
    6570              :       else
    6571            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
    6572              :                                                    gs_info.alias_ptr,
    6573              :                                                    offset, scale, zero);
    6574            0 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
    6575            0 :       tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6576            0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6577              :     }
    6578              :   else
    6579              :     {
    6580            0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6581            0 :       if (mask != NULL)
    6582            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
    6583              :                                                    base, gs_info.alias_ptr,
    6584              :                                                    offset, scale, rhs, mask);
    6585              :       else
    6586            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6587              :                                                    base, gs_info.alias_ptr,
    6588              :                                                    offset, scale, rhs);
    6589              :     }
    6590            0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6591              : 
    6592              :   /* Copy across relevant vectorization info and associate DR with the
    6593              :      new pattern statement instead of the original statement.  */
    6594            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6595            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6596              : 
    6597            0 :   *type_out = gs_vectype;
    6598            0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6599              : 
    6600            0 :   return pattern_stmt;
    6601              : }
    6602              : 
    6603              : /* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
    6604              :    is points to a load statement that reads the same data as that of
    6605              :    STORE_VINFO.  */
    6606              : 
    6607              : static bool
    6608        35664 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
    6609              :                                   stmt_vec_info store_vinfo, tree cond_arg)
    6610              : {
    6611        35664 :   stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
    6612        35664 :   if (!load_stmt_vinfo
    6613        20717 :       || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
    6614        12402 :       || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
    6615        48066 :       || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
    6616              :                           STMT_VINFO_DATA_REF (load_stmt_vinfo)))
    6617        26310 :     return false;
    6618              : 
    6619              :   return true;
    6620              : }
    6621              : 
    6622              : /* Function vect_recog_cond_store_pattern
    6623              : 
    6624              :    Try to find the following pattern:
    6625              : 
    6626              :    x = *_3;
    6627              :    c = a CMP b;
    6628              :    y = c ? t_20 : x;
    6629              :    *_3 = y;
    6630              : 
    6631              :    where the store of _3 happens on a conditional select on a value loaded
    6632              :    from the same location.  In such case we can elide the initial load if
    6633              :    MASK_STORE is supported and instead only conditionally write out the result.
    6634              : 
    6635              :    The pattern produces for the above:
    6636              : 
    6637              :    c = a CMP b;
    6638              :    .MASK_STORE (_3, c, t_20)
    6639              : 
    6640              :    Input:
    6641              : 
    6642              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    6643              :    example, when this function is called with _3 then the search begins.
    6644              : 
    6645              :    Output:
    6646              : 
    6647              :    * TYPE_OUT: The type of the output  of this pattern.
    6648              : 
    6649              :    * Return value: A new stmt that will be used to replace the sequence.  */
    6650              : 
    6651              : static gimple *
    6652     30860773 : vect_recog_cond_store_pattern (vec_info *vinfo,
    6653              :                                stmt_vec_info stmt_vinfo, tree *type_out)
    6654              : {
    6655     30860773 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6656      4482288 :   if (!loop_vinfo)
    6657              :     return NULL;
    6658              : 
    6659      4482288 :   gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
    6660              : 
    6661              :   /* Needs to be a gimple store where we have DR info for.  */
    6662      4482288 :   if (!STMT_VINFO_DATA_REF (stmt_vinfo)
    6663      1072227 :       || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
    6664      4861306 :       || !gimple_store_p (store_stmt))
    6665      4105738 :     return NULL;
    6666              : 
    6667       376550 :   tree st_rhs = gimple_assign_rhs1 (store_stmt);
    6668              : 
    6669       376550 :   if (TREE_CODE (st_rhs) != SSA_NAME)
    6670              :     return NULL;
    6671              : 
    6672       293161 :   auto cond_vinfo = vinfo->lookup_def (st_rhs);
    6673              : 
    6674              :   /* If the condition isn't part of the loop then bool recog wouldn't have seen
    6675              :      it and so this transformation may not be valid.  */
    6676       293161 :   if (!cond_vinfo)
    6677              :     return NULL;
    6678              : 
    6679       275747 :   cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
    6680     31122674 :   gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
    6681       340864 :   if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
    6682              :     return NULL;
    6683              : 
    6684              :   /* Check if the else value matches the original loaded one.  */
    6685        18893 :   bool invert = false;
    6686        18893 :   tree cmp_ls = gimple_arg (cond_stmt, 0);
    6687        18893 :   if (TREE_CODE (cmp_ls) != SSA_NAME)
    6688              :     return NULL;
    6689              : 
    6690        18893 :   tree cond_arg1 = gimple_arg (cond_stmt, 1);
    6691        18893 :   tree cond_arg2 = gimple_arg (cond_stmt, 2);
    6692              : 
    6693        18893 :   if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
    6694        18893 :       && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
    6695              :                                                       cond_arg1)))
    6696              :     return NULL;
    6697              : 
    6698         9354 :   vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
    6699              : 
    6700         9354 :   tree scalar_type = TREE_TYPE (st_rhs);
    6701         9354 :   if (VECTOR_TYPE_P (scalar_type))
    6702              :     return NULL;
    6703              : 
    6704         9354 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6705         9354 :   if (vectype == NULL_TREE)
    6706              :     return NULL;
    6707              : 
    6708         9354 :   machine_mode mask_mode;
    6709         9354 :   machine_mode vecmode = TYPE_MODE (vectype);
    6710         1848 :   if (!VECTOR_MODE_P (vecmode)
    6711         9354 :       || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
    6712            0 :       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    6713         9354 :       || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
    6714         9354 :     return NULL;
    6715              : 
    6716            0 :   tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
    6717            0 :   if (may_be_nonaddressable_p (base))
    6718              :     return NULL;
    6719              : 
    6720              :   /* We need to use the false parameter of the conditional select.  */
    6721            0 :   tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
    6722            0 :   tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
    6723            0 :   gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
    6724              : 
    6725              :   /* This is a rough estimation to check that there aren't any aliasing stores
    6726              :      in between the load and store.  It's a bit strict, but for now it's good
    6727              :      enough.  */
    6728            0 :   if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
    6729              :     return NULL;
    6730              : 
    6731              :   /* If we have to invert the condition, i.e. use the true argument rather than
    6732              :      the false argument, we have to negate the mask.  */
    6733            0 :   if (invert)
    6734              :     {
    6735            0 :       tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6736              : 
    6737              :       /* Invert the mask using ^ 1.  */
    6738            0 :       tree itype = TREE_TYPE (cmp_ls);
    6739            0 :       gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
    6740              :                                            build_int_cst (itype, 1));
    6741              : 
    6742            0 :       tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
    6743            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
    6744            0 :       cmp_ls= var;
    6745              :     }
    6746              : 
    6747            0 :   if (TREE_CODE (base) != MEM_REF)
    6748            0 :    base = build_fold_addr_expr (base);
    6749              : 
    6750            0 :   tree ptr = build_int_cst (reference_alias_ptr_type (base),
    6751            0 :                             get_object_alignment (base));
    6752              : 
    6753              :   /* Convert the mask to the right form.  */
    6754            0 :   tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
    6755              :                                              vinfo);
    6756              : 
    6757            0 :   gcall *call
    6758            0 :     = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
    6759              :                                   cond_store_arg);
    6760            0 :   gimple_set_location (call, gimple_location (store_stmt));
    6761              : 
    6762              :   /* Copy across relevant vectorization info and associate DR with the
    6763              :      new pattern statement instead of the original statement.  */
    6764            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
    6765            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6766              : 
    6767            0 :   *type_out = vectype;
    6768            0 :   return call;
    6769              : }
    6770              : 
    6771              : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6772              :    that we want to consider for narrowing.  */
    6773              : 
    6774              : static bool
    6775     61927427 : vect_narrowable_type_p (tree type)
    6776              : {
    6777     61927427 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6778              : }
    6779              : 
    6780              : /* Return true if the operation given by CODE can be truncated to N bits
    6781              :    when only N bits of the output are needed.  This is only true if bit N+1
    6782              :    of the inputs has no effect on the low N bits of the result.  */
    6783              : 
    6784              : static bool
    6785     15844985 : vect_truncatable_operation_p (tree_code code)
    6786              : {
    6787     15844985 :   switch (code)
    6788              :     {
    6789              :     case NEGATE_EXPR:
    6790              :     case PLUS_EXPR:
    6791              :     case MINUS_EXPR:
    6792              :     case MULT_EXPR:
    6793              :     case BIT_NOT_EXPR:
    6794              :     case BIT_AND_EXPR:
    6795              :     case BIT_IOR_EXPR:
    6796              :     case BIT_XOR_EXPR:
    6797              :     case COND_EXPR:
    6798              :       return true;
    6799              : 
    6800      6024536 :     default:
    6801      6024536 :       return false;
    6802              :     }
    6803              : }
    6804              : 
    6805              : /* Record that STMT_INFO could be changed from operating on TYPE to
    6806              :    operating on a type with the precision and sign given by PRECISION
    6807              :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6808              :    it might not be a whole number of bytes.  */
    6809              : 
    6810              : static void
    6811      2507007 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6812              :                          unsigned int precision, signop sign)
    6813              : {
    6814              :   /* Round the precision up to a whole number of bytes.  */
    6815      2507007 :   precision = vect_element_precision (precision);
    6816      2507007 :   if (precision < TYPE_PRECISION (type)
    6817      2507007 :       && (!stmt_info->operation_precision
    6818        41590 :           || stmt_info->operation_precision > precision))
    6819              :     {
    6820      1629033 :       stmt_info->operation_precision = precision;
    6821      1629033 :       stmt_info->operation_sign = sign;
    6822              :     }
    6823      2507007 : }
    6824              : 
    6825              : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6826              :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6827              :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6828              : 
    6829              : static void
    6830     11508385 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6831              :                               unsigned int min_input_precision)
    6832              : {
    6833              :   /* This operation in isolation only requires the inputs to have
    6834              :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6835              :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6836              :      as a whole.  E.g. consider something like:
    6837              : 
    6838              :          unsigned short *x, *y;
    6839              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6840              : 
    6841              :      The right shift can be done on unsigned chars, and only requires the
    6842              :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6843              :      approach would mean turning a natural chain of single-vector unsigned
    6844              :      short operations into one that truncates "*x" and then extends
    6845              :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6846              :      operation and one vector for each unsigned char operation.
    6847              :      This would be a significant pessimization.
    6848              : 
    6849              :      Instead only propagate the maximum of this precision and the precision
    6850              :      required by the users of the result.  This means that we don't pessimize
    6851              :      the case above but continue to optimize things like:
    6852              : 
    6853              :          unsigned char *y;
    6854              :          unsigned short *x;
    6855              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6856              : 
    6857              :      Here we would truncate two vectors of *x to a single vector of
    6858              :      unsigned chars and use single-vector unsigned char operations for
    6859              :      everything else, rather than doing two unsigned short copies of
    6860              :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6861     11508385 :   min_input_precision = MAX (min_input_precision,
    6862              :                              stmt_info->min_output_precision);
    6863              : 
    6864     11508385 :   if (min_input_precision < TYPE_PRECISION (type)
    6865     11508385 :       && (!stmt_info->min_input_precision
    6866        63697 :           || stmt_info->min_input_precision > min_input_precision))
    6867       552994 :     stmt_info->min_input_precision = min_input_precision;
    6868     11508385 : }
    6869              : 
    6870              : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6871              :    we can calculate a reduced number of output bits for STMT_INFO,
    6872              :    whose result is LHS.  */
    6873              : 
    6874              : static bool
    6875     14663868 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6876              :                                        stmt_vec_info stmt_info, tree lhs)
    6877              : {
    6878              :   /* Take the maximum precision required by users of the result.  */
    6879     14663868 :   unsigned int precision = 0;
    6880     14663868 :   imm_use_iterator iter;
    6881     14663868 :   use_operand_p use;
    6882     30110387 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6883              :     {
    6884     15190358 :       gimple *use_stmt = USE_STMT (use);
    6885     15190358 :       if (is_gimple_debug (use_stmt))
    6886       522668 :         continue;
    6887     14667690 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6888     14667690 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6889              :         return false;
    6890              :       /* The input precision recorded for COND_EXPRs applies only to the
    6891              :          "then" and "else" values.  */
    6892       260543 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6893       228190 :       if (assign
    6894       228190 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6895          560 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6896          560 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6897              :         return false;
    6898       784336 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6899     14407707 :     }
    6900              : 
    6901       256161 :   if (dump_enabled_p ())
    6902         5836 :     dump_printf_loc (MSG_NOTE, vect_location,
    6903              :                      "only the low %d bits of %T are significant\n",
    6904              :                      precision, lhs);
    6905       256161 :   stmt_info->min_output_precision = precision;
    6906       256161 :   return true;
    6907              : }
    6908              : 
    6909              : /* Calculate min_output_precision for STMT_INFO.  */
    6910              : 
    6911              : static void
    6912     37266275 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6913              : {
    6914              :   /* We're only interested in statements with a narrowable result.  */
    6915     37266275 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6916     37266275 :   if (!lhs
    6917     29085507 :       || TREE_CODE (lhs) != SSA_NAME
    6918     61785353 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6919              :     return;
    6920              : 
    6921     14663868 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6922     14407707 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6923              : }
    6924              : 
    6925              : /* Use range information to decide whether STMT (described by STMT_INFO)
    6926              :    could be done in a narrower type.  This is effectively a forward
    6927              :    propagation, since it uses context-independent information that applies
    6928              :    to all users of an SSA name.  */
    6929              : 
    6930              : static void
    6931     20850485 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6932              : {
    6933     20850485 :   tree lhs = gimple_assign_lhs (stmt);
    6934     20850485 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6935     18450275 :     return;
    6936              : 
    6937     16557864 :   tree type = TREE_TYPE (lhs);
    6938     16557864 :   if (!vect_narrowable_type_p (type))
    6939              :     return;
    6940              : 
    6941              :   /* First see whether we have any useful range information for the result.  */
    6942     11259385 :   unsigned int precision = TYPE_PRECISION (type);
    6943     11259385 :   signop sign = TYPE_SIGN (type);
    6944     11259385 :   wide_int min_value, max_value;
    6945     11259385 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6946              :     return;
    6947              : 
    6948      5521115 :   tree_code code = gimple_assign_rhs_code (stmt);
    6949      5521115 :   unsigned int nops = gimple_num_ops (stmt);
    6950              : 
    6951      5521115 :   if (!vect_truncatable_operation_p (code))
    6952              :     {
    6953              :       /* Handle operations that can be computed in type T if all inputs
    6954              :          and outputs can be represented in type T.  Also handle left and
    6955              :          right shifts, where (in addition) the maximum shift amount must
    6956              :          be less than the number of bits in T.  */
    6957      1987820 :       bool is_shift;
    6958      1987820 :       switch (code)
    6959              :         {
    6960              :         case LSHIFT_EXPR:
    6961              :         case RSHIFT_EXPR:
    6962              :           is_shift = true;
    6963              :           break;
    6964              : 
    6965       266881 :         case ABS_EXPR:
    6966       266881 :         case MIN_EXPR:
    6967       266881 :         case MAX_EXPR:
    6968       266881 :         case TRUNC_DIV_EXPR:
    6969       266881 :         case CEIL_DIV_EXPR:
    6970       266881 :         case FLOOR_DIV_EXPR:
    6971       266881 :         case ROUND_DIV_EXPR:
    6972       266881 :         case EXACT_DIV_EXPR:
    6973              :           /* Modulus is excluded because it is typically calculated by doing
    6974              :              a division, for which minimum signed / -1 isn't representable in
    6975              :              the original signed type.  We could take the division range into
    6976              :              account instead, if handling modulus ever becomes important.  */
    6977       266881 :           is_shift = false;
    6978       266881 :           break;
    6979              : 
    6980              :         default:
    6981              :           return;
    6982              :         }
    6983      1305787 :       for (unsigned int i = 1; i < nops; ++i)
    6984              :         {
    6985      1009675 :           tree op = gimple_op (stmt, i);
    6986      1009675 :           wide_int op_min_value, op_max_value;
    6987      1009675 :           if (TREE_CODE (op) == INTEGER_CST)
    6988              :             {
    6989       300103 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6990       300103 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6991              :             }
    6992       709572 :           else if (TREE_CODE (op) == SSA_NAME)
    6993              :             {
    6994       709572 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    6995              :                 return;
    6996              :             }
    6997              :           else
    6998              :             return;
    6999              : 
    7000       656061 :           if (is_shift && i == 2)
    7001              :             {
    7002              :               /* There needs to be one more bit than the maximum shift amount.
    7003              : 
    7004              :                  If the maximum shift amount is already 1 less than PRECISION
    7005              :                  then we can't narrow the shift further.  Dealing with that
    7006              :                  case first ensures that we can safely use an unsigned range
    7007              :                  below.
    7008              : 
    7009              :                  op_min_value isn't relevant, since shifts by negative amounts
    7010              :                  are UB.  */
    7011       198497 :               if (wi::geu_p (op_max_value, precision - 1))
    7012              :                 return;
    7013       179369 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    7014              : 
    7015              :               /* As explained below, we can convert a signed shift into an
    7016              :                  unsigned shift if the sign bit is always clear.  At this
    7017              :                  point we've already processed the ranges of the output and
    7018              :                  the first input.  */
    7019       179369 :               auto op_sign = sign;
    7020       179369 :               if (sign == SIGNED && !wi::neg_p (min_value))
    7021              :                 op_sign = UNSIGNED;
    7022       358738 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    7023       179369 :                                              precision, op_sign);
    7024       358738 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    7025       179369 :                                              precision, op_sign);
    7026              :             }
    7027       636933 :           min_value = wi::min (min_value, op_min_value, sign);
    7028       636933 :           max_value = wi::max (max_value, op_max_value, sign);
    7029      1009675 :         }
    7030              :     }
    7031              : 
    7032              :   /* Try to switch signed types for unsigned types if we can.
    7033              :      This is better for two reasons.  First, unsigned ops tend
    7034              :      to be cheaper than signed ops.  Second, it means that we can
    7035              :      handle things like:
    7036              : 
    7037              :         signed char c;
    7038              :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    7039              : 
    7040              :      as:
    7041              : 
    7042              :         signed char c;
    7043              :         unsigned short res_1 = (unsigned short) c & 0xff00;
    7044              :         int res = (int) res_1;
    7045              : 
    7046              :      where the intermediate result res_1 has unsigned rather than
    7047              :      signed type.  */
    7048      3829407 :   if (sign == SIGNED && !wi::neg_p (min_value))
    7049              :     sign = UNSIGNED;
    7050              : 
    7051              :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    7052      3829407 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    7053      3829407 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    7054      3829407 :   unsigned int value_precision = MAX (precision1, precision2);
    7055      3829407 :   if (value_precision >= precision)
    7056              :     return;
    7057              : 
    7058      2400210 :   if (dump_enabled_p ())
    7059       110187 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7060              :                      " without loss of precision: %G",
    7061              :                      sign == SIGNED ? "signed" : "unsigned",
    7062              :                      value_precision, (gimple *) stmt);
    7063              : 
    7064      2400210 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    7065      2400210 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    7066     11259385 : }
    7067              : 
    7068              : /* Use information about the users of STMT's result to decide whether
    7069              :    STMT (described by STMT_INFO) could be done in a narrower type.
    7070              :    This is effectively a backward propagation.  */
    7071              : 
    7072              : static void
    7073     20850485 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    7074              : {
    7075     20850485 :   tree_code code = gimple_assign_rhs_code (stmt);
    7076     20850485 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    7077     20850485 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    7078     20850485 :   if (!vect_narrowable_type_p (type))
    7079     11742310 :     return;
    7080              : 
    7081     13180245 :   unsigned int precision = TYPE_PRECISION (type);
    7082     13180245 :   unsigned int operation_precision, min_input_precision;
    7083     13180245 :   switch (code)
    7084              :     {
    7085      2385201 :     CASE_CONVERT:
    7086              :       /* Only the bits that contribute to the output matter.  Don't change
    7087              :          the precision of the operation itself.  */
    7088      2385201 :       operation_precision = precision;
    7089      2385201 :       min_input_precision = stmt_info->min_output_precision;
    7090      2385201 :       break;
    7091              : 
    7092       471174 :     case LSHIFT_EXPR:
    7093       471174 :     case RSHIFT_EXPR:
    7094       471174 :       {
    7095       471174 :         tree shift = gimple_assign_rhs2 (stmt);
    7096       471174 :         unsigned int min_const_shift, max_const_shift;
    7097       471174 :         wide_int min_shift, max_shift;
    7098       471174 :         if (TREE_CODE (shift) == SSA_NAME
    7099       105802 :             && vect_get_range_info (shift, &min_shift, &max_shift)
    7100        81490 :             && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
    7101       549813 :             && wi::lt_p (max_shift, TYPE_PRECISION (type),
    7102        78639 :                          TYPE_SIGN (TREE_TYPE (shift))))
    7103              :           {
    7104        70556 :             min_const_shift = min_shift.to_uhwi ();
    7105        70556 :             max_const_shift = max_shift.to_uhwi ();
    7106              :           }
    7107       400618 :         else if (TREE_CODE (shift) == INTEGER_CST
    7108       765990 :                  && wi::ltu_p (wi::to_widest (shift), precision))
    7109       365264 :           min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
    7110              :         else
    7111        35354 :           return;
    7112       435820 :         if (code == LSHIFT_EXPR)
    7113              :           {
    7114              :             /* Avoid creating an undefined shift.
    7115              : 
    7116              :                ??? We could instead use min_output_precision as-is and
    7117              :                optimize out-of-range shifts to zero.  However, only
    7118              :                degenerate testcases shift away all their useful input data,
    7119              :                and it isn't natural to drop input operations in the middle
    7120              :                of vectorization.  This sort of thing should really be
    7121              :                handled before vectorization.  */
    7122       108686 :             operation_precision = MAX (stmt_info->min_output_precision,
    7123              :                                        max_const_shift + 1);
    7124              :             /* We need CONST_SHIFT fewer bits of the input.  */
    7125       108686 :             min_input_precision = (MAX (operation_precision, max_const_shift)
    7126              :                                    - min_const_shift);
    7127              :           }
    7128              :         else
    7129              :           {
    7130              :             /* We need CONST_SHIFT extra bits to do the operation.  */
    7131       327134 :             operation_precision = (stmt_info->min_output_precision
    7132              :                                    + max_const_shift);
    7133       327134 :             min_input_precision = operation_precision;
    7134              :           }
    7135       435820 :         break;
    7136       471174 :       }
    7137              : 
    7138     10323870 :     default:
    7139     10323870 :       if (vect_truncatable_operation_p (code))
    7140              :         {
    7141              :           /* Input bit N has no effect on output bits N-1 and lower.  */
    7142      6287154 :           operation_precision = stmt_info->min_output_precision;
    7143      6287154 :           min_input_precision = operation_precision;
    7144      6287154 :           break;
    7145              :         }
    7146              :       return;
    7147              :     }
    7148              : 
    7149      9108175 :   if (operation_precision < precision)
    7150              :     {
    7151       106797 :       if (dump_enabled_p ())
    7152         2784 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7153              :                          " without affecting users: %G",
    7154         2784 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    7155              :                          operation_precision, (gimple *) stmt);
    7156       213594 :       vect_set_operation_type (stmt_info, type, operation_precision,
    7157       106797 :                                TYPE_SIGN (type));
    7158              :     }
    7159      9108175 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    7160              : }
    7161              : 
    7162              : /* Return true if the statement described by STMT_INFO sets a boolean
    7163              :    SSA_NAME and if we know how to vectorize this kind of statement using
    7164              :    vector mask types.  */
    7165              : 
    7166              : static bool
    7167     38350357 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    7168              : {
    7169     38350357 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    7170     38350357 :   tree_code code = ERROR_MARK;
    7171     38350357 :   gassign *assign = NULL;
    7172     38350357 :   gcond *cond = NULL;
    7173              : 
    7174     38350357 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    7175     21624566 :     code = gimple_assign_rhs_code (assign);
    7176     16725791 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    7177              :     {
    7178      5219316 :       lhs = gimple_cond_lhs (cond);
    7179      5219316 :       code = gimple_cond_code (cond);
    7180              :     }
    7181              : 
    7182     38350357 :   if (!lhs
    7183     35295214 :       || TREE_CODE (lhs) != SSA_NAME
    7184     69040783 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    7185              :     return false;
    7186              : 
    7187      2092025 :   if (code != ERROR_MARK)
    7188              :     {
    7189      1841601 :       switch (code)
    7190              :         {
    7191              :         CASE_CONVERT:
    7192              :         case SSA_NAME:
    7193              :         case BIT_NOT_EXPR:
    7194              :         case BIT_IOR_EXPR:
    7195              :         case BIT_XOR_EXPR:
    7196              :         case BIT_AND_EXPR:
    7197              :           return true;
    7198              : 
    7199      1445970 :         default:
    7200      1445970 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    7201              :         }
    7202              :     }
    7203       250424 :   else if (is_a <gphi *> (stmt_info->stmt))
    7204       142538 :     return true;
    7205              :   return false;
    7206              : }
    7207              : 
    7208              : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    7209              :    a vector mask type instead of a normal vector type.  Record the
    7210              :    result in STMT_INFO->mask_precision.  Returns true when the
    7211              :    precision changed.  */
    7212              : 
    7213              : static bool
    7214     38350357 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    7215              : {
    7216     38350357 :   if (!possible_vector_mask_operation_p (stmt_info))
    7217              :     return false;
    7218              : 
    7219              :   /* If at least one boolean input uses a vector mask type,
    7220              :      pick the mask type with the narrowest elements.
    7221              : 
    7222              :      ??? This is the traditional behavior.  It should always produce
    7223              :      the smallest number of operations, but isn't necessarily the
    7224              :      optimal choice.  For example, if we have:
    7225              : 
    7226              :        a = b & c
    7227              : 
    7228              :      where:
    7229              : 
    7230              :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    7231              :        - b also uses M16
    7232              :        - c uses a mask type for 8-bit elements (M8)
    7233              : 
    7234              :      then picking M8 gives:
    7235              : 
    7236              :        - 1 M16->M8 pack for b
    7237              :        - 1 M8 AND for a
    7238              :        - 2 M8->M16 unpacks for the user of a
    7239              : 
    7240              :      whereas picking M16 would have given:
    7241              : 
    7242              :        - 2 M8->M16 unpacks for c
    7243              :        - 2 M16 ANDs for a
    7244              : 
    7245              :      The number of operations are equal, but M16 would have given
    7246              :      a shorter dependency chain and allowed more ILP.  */
    7247      1933714 :   unsigned int precision = ~0U;
    7248      1933714 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    7249              : 
    7250              :   /* If the statement compares two values that shouldn't use vector masks,
    7251              :      try comparing the values as normal scalars instead.  */
    7252      1933714 :   tree_code code = ERROR_MARK;
    7253      1933714 :   tree op0_type;
    7254      1933714 :   unsigned int nops = -1;
    7255      1933714 :   unsigned int ops_start = 0;
    7256              : 
    7257      1933714 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    7258              :     {
    7259      1260256 :       code = gimple_assign_rhs_code (assign);
    7260      1260256 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    7261      1260256 :       nops = gimple_num_ops (assign);
    7262      1260256 :       ops_start = 1;
    7263              :     }
    7264       673458 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    7265              :     {
    7266       530920 :       code = gimple_cond_code (cond);
    7267       530920 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    7268       530920 :       nops = 2;
    7269       530920 :       ops_start = 0;
    7270              :     }
    7271              : 
    7272      1791176 :   if (code != ERROR_MARK)
    7273              :     {
    7274      5331323 :       for (unsigned int i = ops_start; i < nops; ++i)
    7275              :         {
    7276      3540147 :           tree rhs = gimple_op (stmt, i);
    7277      3540147 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    7278      1739802 :             continue;
    7279              : 
    7280      1800345 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7281      1800345 :           if (!def_stmt_info)
    7282              :             /* Don't let external or constant operands influence the choice.
    7283              :                We can convert them to whichever vector type we pick.  */
    7284       545514 :             continue;
    7285              : 
    7286      1254831 :           if (def_stmt_info->mask_precision)
    7287              :             {
    7288      1037026 :               if (precision > def_stmt_info->mask_precision)
    7289      3540147 :                 precision = def_stmt_info->mask_precision;
    7290              :             }
    7291              :         }
    7292              : 
    7293      1791176 :       if (precision == ~0U
    7294      1456808 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    7295              :         {
    7296      1260624 :           scalar_mode mode;
    7297      1260624 :           tree vectype, mask_type;
    7298      1260624 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    7299              :               /* Do not allow this to set vinfo->vector_mode, this might
    7300              :                  disrupt the result for the next iteration.  */
    7301      1260624 :               && (vectype = get_related_vectype_for_scalar_type
    7302      1510627 :                                                 (vinfo->vector_mode, op0_type))
    7303      1101722 :               && (mask_type = truth_type_for (vectype))
    7304      1101722 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    7305      1703438 :             precision = GET_MODE_BITSIZE (mode);
    7306              :         }
    7307              :     }
    7308              :   else
    7309              :     {
    7310       142538 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    7311       577492 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    7312              :         {
    7313       434954 :           tree rhs = gimple_phi_arg_def (phi, i);
    7314              : 
    7315       434954 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7316       434954 :           if (!def_stmt_info)
    7317              :             /* Don't let external or constant operands influence the choice.
    7318              :                We can convert them to whichever vector type we pick.  */
    7319       279335 :             continue;
    7320              : 
    7321       155619 :           if (def_stmt_info->mask_precision)
    7322              :             {
    7323       130258 :               if (precision > def_stmt_info->mask_precision)
    7324       434954 :                 precision = def_stmt_info->mask_precision;
    7325              :             }
    7326              :         }
    7327              :     }
    7328              : 
    7329      1933714 :   if (stmt_info->mask_precision != precision)
    7330              :     {
    7331      1805850 :       if (dump_enabled_p ())
    7332              :         {
    7333         7989 :           if (precision == ~0U)
    7334         1879 :             dump_printf_loc (MSG_NOTE, vect_location,
    7335              :                              "using normal nonmask vectors for %G",
    7336              :                              stmt_info->stmt);
    7337              :           else
    7338         6110 :             dump_printf_loc (MSG_NOTE, vect_location,
    7339              :                              "using boolean precision %d for %G",
    7340              :                              precision, stmt_info->stmt);
    7341              :         }
    7342              : 
    7343              :       /* ???  We'd like to assert stmt_info->mask_precision == 0
    7344              :          || stmt_info->mask_precision > precision, thus that we only
    7345              :          decrease mask precisions throughout iteration, but the
    7346              :          tcc_comparison handling above means for comparisons of bools
    7347              :          we start with 8 but might increase in case the bools get mask
    7348              :          precision on their own.  */
    7349      1805850 :       stmt_info->mask_precision = precision;
    7350      1805850 :       return true;
    7351              :     }
    7352              :   return false;
    7353              : }
    7354              : 
    7355              : /* Handle vect_determine_precisions for STMT_INFO, given that we
    7356              :    have already done so for the users of its result.  */
    7357              : 
    7358              : void
    7359     37266275 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    7360              : {
    7361     37266275 :   vect_determine_min_output_precision (vinfo, stmt_info);
    7362     37266275 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    7363              :     {
    7364     20850485 :       vect_determine_precisions_from_range (stmt_info, stmt);
    7365     20850485 :       vect_determine_precisions_from_users (stmt_info, stmt);
    7366              :     }
    7367     37266275 : }
    7368              : 
    7369              : /* Walk backwards through the vectorizable region to determine the
    7370              :    values of these fields:
    7371              : 
    7372              :    - min_output_precision
    7373              :    - min_input_precision
    7374              :    - operation_precision
    7375              :    - operation_sign.  */
    7376              : 
    7377              : void
    7378      1047851 : vect_determine_precisions (vec_info *vinfo)
    7379              : {
    7380      1047851 :   basic_block *bbs = vinfo->bbs;
    7381      1047851 :   unsigned int nbbs = vinfo->nbbs;
    7382              : 
    7383      1067247 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    7384              : 
    7385              :   /* For mask precisions we have to iterate since otherwise we do not
    7386              :      get reduction PHI precision correct.  For now do this only for
    7387              :      loop vectorization.  */
    7388      1117867 :   bool changed;
    7389      1117867 :   do
    7390              :     {
    7391      1117867 :       changed = false;
    7392     12645645 :       for (unsigned int i = 0; i < nbbs; i++)
    7393              :         {
    7394     11527778 :           basic_block bb = bbs[i];
    7395     11527778 :           for (auto gsi = gsi_start_phis (bb);
    7396     18644055 :                !gsi_end_p (gsi); gsi_next (&gsi))
    7397              :             {
    7398      7116277 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7399      7116277 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7400      6942872 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7401              :             }
    7402    119072810 :           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7403              :             {
    7404     96017254 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7405     96017254 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7406     31407485 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7407              :             }
    7408              :         }
    7409              :     }
    7410      2165718 :   while (changed && is_a <loop_vec_info> (vinfo));
    7411              : 
    7412     12414278 :   for (unsigned int i = 0; i < nbbs; i++)
    7413              :     {
    7414     11366427 :       basic_block bb = bbs[nbbs - i - 1];
    7415    211571648 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    7416              :         {
    7417     94419397 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7418     94419397 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7419     30526219 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7420              :         }
    7421     18279888 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7422              :         {
    7423      6913461 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7424      6913461 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7425      6740056 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7426              :         }
    7427              :     }
    7428      1047851 : }
    7429              : 
    7430              : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    7431              : 
    7432              : struct vect_recog_func
    7433              : {
    7434              :   vect_recog_func_ptr fn;
    7435              :   const char *name;
    7436              : };
    7437              : 
    7438              : /* Note that ordering matters - the first pattern matching on a stmt is
    7439              :    taken which means usually the more complex one needs to preceed the
    7440              :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    7441              : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    7442              :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    7443              :   { vect_recog_bit_insert_pattern, "bit_insert" },
    7444              :   { vect_recog_abd_pattern, "abd" },
    7445              :   { vect_recog_over_widening_pattern, "over_widening" },
    7446              :   /* Must come after over_widening, which narrows the shift as much as
    7447              :      possible beforehand.  */
    7448              :   { vect_recog_average_pattern, "average" },
    7449              :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    7450              :   { vect_recog_mulhs_pattern, "mult_high" },
    7451              :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    7452              :   { vect_recog_widen_mult_pattern, "widen_mult" },
    7453              :   { vect_recog_dot_prod_pattern, "dot_prod" },
    7454              :   { vect_recog_sad_pattern, "sad" },
    7455              :   { vect_recog_widen_sum_pattern, "widen_sum" },
    7456              :   { vect_recog_pow_pattern, "pow" },
    7457              :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    7458              :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    7459              :   { vect_recog_widen_shift_pattern, "widen_shift" },
    7460              :   { vect_recog_rotate_pattern, "rotate" },
    7461              :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    7462              :   { vect_recog_divmod_pattern, "divmod" },
    7463              :   { vect_recog_mod_var_pattern, "modvar" },
    7464              :   { vect_recog_mult_pattern, "mult" },
    7465              :   { vect_recog_sat_add_pattern, "sat_add" },
    7466              :   { vect_recog_sat_sub_pattern, "sat_sub" },
    7467              :   { vect_recog_sat_trunc_pattern, "sat_trunc" },
    7468              :   { vect_recog_gcond_pattern, "gcond" },
    7469              :   { vect_recog_bool_pattern, "bool" },
    7470              :   /* This must come before mask conversion, and includes the parts
    7471              :      of mask conversion that are needed for gather and scatter
    7472              :      internal functions.  */
    7473              :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    7474              :   { vect_recog_cond_store_pattern, "cond_store" },
    7475              :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    7476              :   { vect_recog_widen_plus_pattern, "widen_plus" },
    7477              :   { vect_recog_widen_minus_pattern, "widen_minus" },
    7478              :   { vect_recog_widen_abd_pattern, "widen_abd" },
    7479              :   /* These must come after the double widening ones.  */
    7480              : };
    7481              : 
    7482              : /* Mark statements that are involved in a pattern.  */
    7483              : 
    7484              : void
    7485       992724 : vect_mark_pattern_stmts (vec_info *vinfo,
    7486              :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    7487              :                          tree pattern_vectype)
    7488              : {
    7489       992724 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    7490       992724 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7491              : 
    7492       992724 :   gimple *orig_pattern_stmt = NULL;
    7493       992724 :   if (is_pattern_stmt_p (orig_stmt_info))
    7494              :     {
    7495              :       /* We're replacing a statement in an existing pattern definition
    7496              :          sequence.  */
    7497        10949 :       orig_pattern_stmt = orig_stmt_info->stmt;
    7498        10949 :       if (dump_enabled_p ())
    7499          664 :         dump_printf_loc (MSG_NOTE, vect_location,
    7500              :                          "replacing earlier pattern %G", orig_pattern_stmt);
    7501              : 
    7502              :       /* To keep the book-keeping simple, just swap the lhs of the
    7503              :          old and new statements, so that the old one has a valid but
    7504              :          unused lhs.  */
    7505        10949 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    7506        10949 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    7507        10949 :       gimple_set_lhs (pattern_stmt, old_lhs);
    7508              : 
    7509        10949 :       if (dump_enabled_p ())
    7510          664 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    7511              : 
    7512              :       /* Switch to the statement that ORIG replaces.  */
    7513        10949 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    7514              : 
    7515              :       /* We shouldn't be replacing the main pattern statement.  */
    7516        10949 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    7517              :                   != orig_pattern_stmt);
    7518              :     }
    7519              : 
    7520       992724 :   if (def_seq)
    7521              :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    7522      2183253 :          !gsi_end_p (si); gsi_next (&si))
    7523              :       {
    7524      1316479 :         if (dump_enabled_p ())
    7525        24566 :           dump_printf_loc (MSG_NOTE, vect_location,
    7526              :                            "extra pattern stmt: %G", gsi_stmt (si));
    7527      1316479 :         stmt_vec_info pattern_stmt_info
    7528      1316479 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    7529              :                                     orig_stmt_info, pattern_vectype);
    7530              :         /* Stmts in the def sequence are not vectorizable cycle or
    7531              :            induction defs, instead they should all be vect_internal_def
    7532              :            feeding the main pattern stmt which retains this def type.  */
    7533      1316479 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    7534              :       }
    7535              : 
    7536       992724 :   if (orig_pattern_stmt)
    7537              :     {
    7538        10949 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    7539              :                               orig_stmt_info, pattern_vectype);
    7540              : 
    7541              :       /* Insert all the new pattern statements before the original one.  */
    7542        10949 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7543        10949 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    7544              :                                                orig_def_seq);
    7545        10949 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    7546        10949 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    7547              : 
    7548              :       /* Remove the pattern statement that this new pattern replaces.  */
    7549        10949 :       gsi_remove (&gsi, false);
    7550              :     }
    7551              :   else
    7552       981775 :     vect_set_pattern_stmt (vinfo,
    7553              :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    7554              : 
    7555              :   /* For any conditionals mark them as vect_condition_def.  */
    7556       992724 :   if (is_a <gcond *> (pattern_stmt))
    7557       358949 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    7558              : 
    7559              :   /* Transfer reduction path info to the pattern.  */
    7560       992724 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    7561              :     {
    7562        15978 :       gimple_match_op op;
    7563        15978 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    7564            0 :         gcc_unreachable ();
    7565        15978 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    7566              :       /* Search the pattern def sequence and the main pattern stmt.  Note
    7567              :          we may have inserted all into a containing pattern def sequence
    7568              :          so the following is a bit awkward.  */
    7569        15978 :       gimple_stmt_iterator si;
    7570        15978 :       gimple *s;
    7571        15978 :       if (def_seq)
    7572              :         {
    7573        14859 :           si = gsi_start (def_seq);
    7574        14859 :           s = gsi_stmt (si);
    7575        14859 :           gsi_next (&si);
    7576              :         }
    7577              :       else
    7578              :         {
    7579              :           si = gsi_none ();
    7580              :           s = pattern_stmt;
    7581              :         }
    7582        33641 :       do
    7583              :         {
    7584        33641 :           bool found = false;
    7585        33641 :           if (gimple_extract_op (s, &op))
    7586              :             {
    7587        81899 :               for (unsigned i = 0; i < op.num_ops; ++i)
    7588        64236 :                 if (op.ops[i] == lookfor)
    7589              :                   {
    7590        15978 :                     STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7591        15978 :                     lookfor = gimple_get_lhs (s);
    7592        15978 :                     found = true;
    7593        15978 :                     break;
    7594              :                   }
    7595              :               /* Try harder to find a mid-entry into an earlier pattern
    7596              :                  sequence.  Likewise an entry to a stmt skipping a conversion
    7597              :                  on an input.  This means that the initial 'lookfor' was
    7598              :                  bogus.  */
    7599        15978 :               if (!found)
    7600              :                 {
    7601        38322 :                   for (unsigned i = 0; i < op.num_ops; ++i)
    7602        20659 :                     if (TREE_CODE (op.ops[i]) == SSA_NAME)
    7603        17663 :                       if (auto def = vinfo->lookup_def (op.ops[i]))
    7604        17472 :                         if (vect_is_reduction (def)
    7605        17472 :                             || (is_a <gphi *> (def->stmt)
    7606            0 :                                 && STMT_VINFO_REDUC_DEF (def) != NULL))
    7607              :                           {
    7608            0 :                             STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7609            0 :                             lookfor = gimple_get_lhs (s);
    7610            0 :                             found = true;
    7611            0 :                             break;
    7612              :                           }
    7613              :                 }
    7614              :             }
    7615        33641 :           if (s == pattern_stmt)
    7616              :             {
    7617        15978 :               if (!found && dump_enabled_p ())
    7618            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7619              :                                  "failed to update reduction index.\n");
    7620        15978 :               break;
    7621              :             }
    7622        17663 :           if (gsi_end_p (si))
    7623              :             s = pattern_stmt;
    7624              :           else
    7625              :             {
    7626         2804 :               s = gsi_stmt (si);
    7627         2804 :               if (s == pattern_stmt)
    7628              :                 /* Found the end inside a bigger pattern def seq.  */
    7629              :                 si = gsi_none ();
    7630              :               else
    7631         2804 :                 gsi_next (&si);
    7632              :             }
    7633              :         } while (1);
    7634              :     }
    7635       992724 : }
    7636              : 
    7637              : /* Function vect_pattern_recog_1
    7638              : 
    7639              :    Input:
    7640              :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7641              :         computation pattern.
    7642              :    STMT_INFO: A stmt from which the pattern search should start.
    7643              : 
    7644              :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7645              :    a sequence of statements that has the same functionality and can be
    7646              :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7647              :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7648              :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7649              :    statement, having first checked that the target supports the new operation
    7650              :    in that type.
    7651              : 
    7652              :    This function also does some bookkeeping, as explained in the documentation
    7653              :    for vect_recog_pattern.  */
    7654              : 
    7655              : static void
    7656    994570280 : vect_pattern_recog_1 (vec_info *vinfo,
    7657              :                       const vect_recog_func &recog_func, stmt_vec_info stmt_info)
    7658              : {
    7659    994570280 :   gimple *pattern_stmt;
    7660    994570280 :   tree pattern_vectype;
    7661              : 
    7662              :   /* If this statement has already been replaced with pattern statements,
    7663              :      leave the original statement alone, since the first match wins.
    7664              :      Instead try to match against the definition statements that feed
    7665              :      the main pattern statement.  */
    7666    994570280 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7667              :     {
    7668     12534570 :       gimple_stmt_iterator gsi;
    7669     12534570 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7670     30265842 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7671     17731272 :         vect_pattern_recog_1 (vinfo, recog_func,
    7672              :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7673              :       return;
    7674              :     }
    7675              : 
    7676    982035710 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7677    982035710 :   pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
    7678    982035710 :   if (!pattern_stmt)
    7679              :     {
    7680              :       /* Clear any half-formed pattern definition sequence.  */
    7681    981042986 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7682    981042986 :       return;
    7683              :     }
    7684              : 
    7685              :   /* Found a vectorizable pattern.  */
    7686       992724 :   if (dump_enabled_p ())
    7687        18730 :     dump_printf_loc (MSG_NOTE, vect_location,
    7688              :                      "%s pattern recognized: %G",
    7689        18730 :                      recog_func.name, pattern_stmt);
    7690              : 
    7691              :   /* Mark the stmts that are involved in the pattern. */
    7692       992724 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7693              : }
    7694              : 
    7695              : 
    7696              : /* Function vect_pattern_recog
    7697              : 
    7698              :    Input:
    7699              :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7700              :         computation idioms.
    7701              : 
    7702              :    Output - for each computation idiom that is detected we create a new stmt
    7703              :         that provides the same functionality and that can be vectorized.  We
    7704              :         also record some information in the struct_stmt_info of the relevant
    7705              :         stmts, as explained below:
    7706              : 
    7707              :    At the entry to this function we have the following stmts, with the
    7708              :    following initial value in the STMT_VINFO fields:
    7709              : 
    7710              :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7711              :          S1: a_i = ....                 -       -               -
    7712              :          S2: a_2 = ..use(a_i)..         -       -               -
    7713              :          S3: a_1 = ..use(a_2)..         -       -               -
    7714              :          S4: a_0 = ..use(a_1)..         -       -               -
    7715              :          S5: ... = ..use(a_0)..         -       -               -
    7716              : 
    7717              :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7718              :    represented by a single stmt.  We then:
    7719              :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7720              :      inserted into the code)
    7721              :    - fill in the STMT_VINFO fields as follows:
    7722              : 
    7723              :                                   in_pattern_p  related_stmt    vec_stmt
    7724              :          S1: a_i = ....                 -       -               -
    7725              :          S2: a_2 = ..use(a_i)..         -       -               -
    7726              :          S3: a_1 = ..use(a_2)..         -       -               -
    7727              :          S4: a_0 = ..use(a_1)..         true    S6              -
    7728              :           '---> S6: a_new = ....        -       S4              -
    7729              :          S5: ... = ..use(a_0)..         -       -               -
    7730              : 
    7731              :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7732              :    to each other through the RELATED_STMT field).
    7733              : 
    7734              :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7735              :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7736              :    remain irrelevant unless used by stmts other than S4.
    7737              : 
    7738              :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7739              :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7740              :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7741              :    S4 will be skipped, and S5 will be vectorized as usual:
    7742              : 
    7743              :                                   in_pattern_p  related_stmt    vec_stmt
    7744              :          S1: a_i = ....                 -       -               -
    7745              :          S2: a_2 = ..use(a_i)..         -       -               -
    7746              :          S3: a_1 = ..use(a_2)..         -       -               -
    7747              :        > VS6: va_new = ....             -       -               -
    7748              :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7749              :           '---> S6: a_new = ....        -       S4              VS6
    7750              :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7751              :          S5: ... = ..use(a_0)..         -       -               -
    7752              : 
    7753              :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7754              :    elsewhere), and we'll end up with:
    7755              : 
    7756              :         VS6: va_new = ....
    7757              :         VS5: ... = ..vuse(va_new)..
    7758              : 
    7759              :    In case of more than one pattern statements, e.g., widen-mult with
    7760              :    intermediate type:
    7761              : 
    7762              :      S1  a_t = ;
    7763              :      S2  a_T = (TYPE) a_t;
    7764              :            '--> S3: a_it = (interm_type) a_t;
    7765              :      S4  prod_T = a_T * CONST;
    7766              :            '--> S5: prod_T' = a_it w* CONST;
    7767              : 
    7768              :    there may be other users of a_T outside the pattern.  In that case S2 will
    7769              :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7770              :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7771              :    be recorded in S3.  */
    7772              : 
    7773              : void
    7774      1047851 : vect_pattern_recog (vec_info *vinfo)
    7775              : {
    7776      1047851 :   basic_block *bbs = vinfo->bbs;
    7777      1047851 :   unsigned int nbbs = vinfo->nbbs;
    7778              : 
    7779      1047851 :   vect_determine_precisions (vinfo);
    7780              : 
    7781      1047851 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7782              : 
    7783              :   /* Scan through the stmts in the region, applying the pattern recognition
    7784              :      functions starting at each stmt visited.  */
    7785     12414278 :   for (unsigned i = 0; i < nbbs; i++)
    7786              :     {
    7787     11366427 :       basic_block bb = bbs[i];
    7788              : 
    7789    117152251 :       for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7790              :         {
    7791     94419397 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7792              : 
    7793     94419397 :           if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7794     63893178 :             continue;
    7795              : 
    7796              :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7797   1007365227 :           for (const auto &func_ptr : vect_vect_recog_func_ptrs)
    7798    976839008 :             vect_pattern_recog_1 (vinfo, func_ptr,
    7799              :                                   stmt_info);
    7800              :         }
    7801              :     }
    7802              : 
    7803              :   /* After this no more add_stmt calls are allowed.  */
    7804      1047851 :   vinfo->stmt_vec_info_ro = true;
    7805      1047851 : }
    7806              : 
    7807              : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7808              :    or internal_fn contained in ch, respectively.  */
    7809              : gimple *
    7810       159130 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7811              : {
    7812       159130 :   gcc_assert (op0 != NULL_TREE);
    7813       159130 :   if (ch.is_tree_code ())
    7814       159130 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7815              : 
    7816            0 :   gcc_assert (ch.is_internal_fn ());
    7817            0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7818              :                                              op1 == NULL_TREE ? 1 : 2,
    7819              :                                              op0, op1);
    7820            0 :   gimple_call_set_lhs (stmt, lhs);
    7821            0 :   return stmt;
    7822              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.