LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.8 % 3119 2771
Test Date: 2026-02-28 14:20:25 Functions: 97.6 % 85 83
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Analysis Utilities for Loop Vectorization.
       2              :    Copyright (C) 2006-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "gimple-iterator.h"
      29              : #include "gimple-fold.h"
      30              : #include "ssa.h"
      31              : #include "expmed.h"
      32              : #include "optabs-tree.h"
      33              : #include "insn-config.h"
      34              : #include "recog.h"            /* FIXME: for insn_data */
      35              : #include "fold-const.h"
      36              : #include "stor-layout.h"
      37              : #include "tree-eh.h"
      38              : #include "gimplify.h"
      39              : #include "gimple-iterator.h"
      40              : #include "gimple-fold.h"
      41              : #include "gimplify-me.h"
      42              : #include "cfgloop.h"
      43              : #include "tree-vectorizer.h"
      44              : #include "dumpfile.h"
      45              : #include "builtins.h"
      46              : #include "internal-fn.h"
      47              : #include "case-cfn-macros.h"
      48              : #include "fold-const-call.h"
      49              : #include "attribs.h"
      50              : #include "cgraph.h"
      51              : #include "omp-simd-clone.h"
      52              : #include "predict.h"
      53              : #include "tree-vector-builder.h"
      54              : #include "tree-ssa-loop-ivopts.h"
      55              : #include "vec-perm-indices.h"
      56              : #include "gimple-range.h"
      57              : #include "alias.h"
      58              : 
      59              : 
      60              : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      61              :    in the first operand.  Disentangling this is future work, the
      62              :    IL is properly transferred to VEC_COND_EXPRs with separate compares.  */
      63              : 
      64              : 
      65              : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      66              :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      67              : 
      68              : bool
      69     11722828 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      70              : {
      71     11722828 :   int_range_max vr;
      72     11722828 :   tree vr_min, vr_max;
      73     23445656 :   get_range_query (cfun)->range_of_expr (vr, var);
      74     11722828 :   if (vr.undefined_p ())
      75           84 :     vr.set_varying (TREE_TYPE (var));
      76     11722828 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      77     11722828 :   *min_value = wi::to_wide (vr_min);
      78     11722828 :   *max_value = wi::to_wide (vr_max);
      79     11722828 :   wide_int nonzero = get_nonzero_bits (var);
      80     11722828 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      81     11722828 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      82              :                                          nonzero, sgn) == VR_RANGE)
      83              :     {
      84      5741625 :       if (dump_enabled_p ())
      85              :         {
      86        85429 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      87        85429 :           dump_printf (MSG_NOTE, " has range [");
      88        85429 :           dump_hex (MSG_NOTE, *min_value);
      89        85429 :           dump_printf (MSG_NOTE, ", ");
      90        85429 :           dump_hex (MSG_NOTE, *max_value);
      91        85429 :           dump_printf (MSG_NOTE, "]\n");
      92              :         }
      93      5741625 :       return true;
      94              :     }
      95              :   else
      96              :     {
      97      5981203 :       if (dump_enabled_p ())
      98              :         {
      99        65859 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     100        65859 :           dump_printf (MSG_NOTE, " has no range info\n");
     101              :         }
     102      5981203 :       return false;
     103              :     }
     104     11722828 : }
     105              : 
     106              : /* Report that we've found an instance of pattern PATTERN in
     107              :    statement STMT.  */
     108              : 
     109              : static void
     110      1183118 : vect_pattern_detected (const char *name, gimple *stmt)
     111              : {
     112      1183118 :   if (dump_enabled_p ())
     113        24231 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     114      1183118 : }
     115              : 
     116              : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     117              :    return the pattern statement's stmt_vec_info.  Set its vector type to
     118              :    VECTYPE if it doesn't have one already.  */
     119              : 
     120              : static stmt_vec_info
     121      2311458 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     122              :                         stmt_vec_info orig_stmt_info, tree vectype)
     123              : {
     124      2311458 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     125      2311458 :   if (pattern_stmt_info == NULL)
     126      1334008 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     127      2311458 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     128              : 
     129      2311458 :   pattern_stmt_info->pattern_stmt_p = true;
     130      2311458 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     131      2311458 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     132      2311458 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     133      2311458 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     134              :     {
     135      2273515 :       gcc_assert (!vectype
     136              :                   || is_a <gcond *> (pattern_stmt)
     137              :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     138              :                       == vect_use_mask_type_p (orig_stmt_info)));
     139      1342662 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     140      1342662 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     141              :     }
     142      2311458 :   return pattern_stmt_info;
     143              : }
     144              : 
     145              : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     146              :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     147              :    have one already.  */
     148              : 
     149              : static void
     150       982923 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     151              :                        stmt_vec_info orig_stmt_info, tree vectype)
     152              : {
     153       982923 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     154       982923 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     155            0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     156       953931 : }
     157              : 
     158              : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     159              :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     160              :    be different from the vector type of the final pattern statement.
     161              :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     162              :    from which it was derived.  */
     163              : 
     164              : static inline void
     165      1290450 : append_pattern_def_seq (vec_info *vinfo,
     166              :                         stmt_vec_info stmt_info, gimple *new_stmt,
     167              :                         tree vectype = NULL_TREE,
     168              :                         tree scalar_type_for_mask = NULL_TREE)
     169              : {
     170      1962607 :   gcc_assert (!scalar_type_for_mask
     171              :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     172      1290450 :   if (vectype)
     173              :     {
     174       969035 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     175       969035 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     176       969035 :       if (scalar_type_for_mask)
     177       618293 :         new_stmt_info->mask_precision
     178      1236586 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     179              :     }
     180      1290450 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     181              :                                       new_stmt);
     182      1290450 : }
     183              : 
     184              : 
     185              : /* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
     186              :    statements are not vectorized but are materialized as scalar in the loop
     187              :    preheader.  */
     188              : 
     189              : static inline void
     190         1325 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
     191              : {
     192         1325 :   gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
     193              : }
     194              : 
     195              : /* The caller wants to perform new operations on vect_external variable
     196              :    VAR, so that the result of the operations would also be vect_external.
     197              :    Return the edge on which the operations can be performed, if one exists.
     198              :    Return null if the operations should instead be treated as part of
     199              :    the pattern that needs them.  */
     200              : 
     201              : static edge
     202         8440 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     203              : {
     204         8440 :   edge e = NULL;
     205         8440 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     206              :     {
     207          729 :       e = loop_preheader_edge (loop_vinfo->loop);
     208          729 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     209              :         {
     210          542 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     211          542 :           if (bb == NULL
     212          542 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     213              :             e = NULL;
     214              :         }
     215              :     }
     216         8440 :   return e;
     217              : }
     218              : 
     219              : /* Return true if the target supports a vector version of CODE,
     220              :    where CODE is known to map to a direct optab with the given SUBTYPE.
     221              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     222              :    specifies the type of the scalar result.
     223              : 
     224              :    If CODE allows the inputs and outputs to have different type
     225              :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     226              :    than the output mode that determines the appropriate target pattern.
     227              :    Operand 0 of the target pattern then specifies the mode that the output
     228              :    must have.
     229              : 
     230              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     231              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     232              :    is nonnull.  */
     233              : 
     234              : static bool
     235          433 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     236              :                                  tree itype, tree *vecotype_out,
     237              :                                  tree *vecitype_out = NULL,
     238              :                                  enum optab_subtype subtype = optab_default)
     239              : {
     240          433 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     241          433 :   if (!vecitype)
     242              :     return false;
     243              : 
     244          433 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     245          433 :   if (!vecotype)
     246              :     return false;
     247              : 
     248          433 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     249          433 :   if (!optab)
     250              :     return false;
     251              : 
     252          433 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     253          433 :   if (icode == CODE_FOR_nothing
     254          433 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     255          160 :     return false;
     256              : 
     257          273 :   *vecotype_out = vecotype;
     258          273 :   if (vecitype_out)
     259          266 :     *vecitype_out = vecitype;
     260              :   return true;
     261              : }
     262              : 
     263              : /* Return true if the target supports a vector version of CODE,
     264              :    where CODE is known to map to a conversion optab with the given SUBTYPE.
     265              :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     266              :    specifies the type of the scalar result.
     267              : 
     268              :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     269              :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     270              :    is nonnull.  */
     271              : 
     272              : static bool
     273         2698 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
     274              :                                  tree itype, tree *vecotype_out,
     275              :                                  tree *vecitype_out = NULL,
     276              :                                  enum optab_subtype subtype = optab_default)
     277              : {
     278         2698 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     279         2698 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     280         2698 :   if (!vecitype || !vecotype)
     281              :     return false;
     282              : 
     283         2469 :   if (!directly_supported_p (code, vecotype, vecitype, subtype))
     284              :     return false;
     285              : 
     286          464 :   *vecotype_out = vecotype;
     287          464 :   if (vecitype_out)
     288          464 :     *vecitype_out = vecitype;
     289              :   return true;
     290              : }
     291              : 
     292              : /* Round bit precision PRECISION up to a full element.  */
     293              : 
     294              : static unsigned int
     295      3023215 : vect_element_precision (unsigned int precision)
     296              : {
     297            0 :   precision = 1 << ceil_log2 (precision);
     298      4488272 :   return MAX (precision, BITS_PER_UNIT);
     299              : }
     300              : 
     301              : /* If OP is defined by a statement that's being considered for vectorization,
     302              :    return information about that statement, otherwise return NULL.  */
     303              : 
     304              : static stmt_vec_info
     305       320123 : vect_get_internal_def (vec_info *vinfo, tree op)
     306              : {
     307       320123 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     308       320123 :   if (def_stmt_info
     309       306610 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     310       288666 :     return vect_stmt_to_vectorize (def_stmt_info);
     311              :   return NULL;
     312              : }
     313              : 
     314              : /* Holds information about an input operand after some sign changes
     315              :    and type promotions have been peeled away.  */
     316              : class vect_unpromoted_value {
     317              : public:
     318              :   vect_unpromoted_value ();
     319              : 
     320              :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     321              : 
     322              :   /* The value obtained after peeling away zero or more casts.  */
     323              :   tree op;
     324              : 
     325              :   /* The type of OP.  */
     326              :   tree type;
     327              : 
     328              :   /* The definition type of OP.  */
     329              :   vect_def_type dt;
     330              : 
     331              :   /* If OP is the result of peeling at least one cast, and if the cast
     332              :      of OP itself is a vectorizable statement, CASTER identifies that
     333              :      statement, otherwise it is null.  */
     334              :   stmt_vec_info caster;
     335              : };
     336              : 
     337    286600208 : inline vect_unpromoted_value::vect_unpromoted_value ()
     338    286600208 :   : op (NULL_TREE),
     339    286600208 :     type (NULL_TREE),
     340    286600208 :     dt (vect_uninitialized_def),
     341      2918978 :     caster (NULL)
     342              : {
     343              : }
     344              : 
     345              : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     346              :    statement that casts it to CASTER_IN.  */
     347              : 
     348              : inline void
     349     10631382 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     350              :                                stmt_vec_info caster_in)
     351              : {
     352     10631382 :   op = op_in;
     353     10631382 :   type = TREE_TYPE (op);
     354     10631382 :   dt = dt_in;
     355     10631382 :   caster = caster_in;
     356     10631382 : }
     357              : 
     358              : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     359              :    to reach some vectorizable inner operand OP', continuing as long as it
     360              :    is possible to convert OP' back to OP using a possible sign change
     361              :    followed by a possible promotion P.  Return this OP', or null if OP is
     362              :    not a vectorizable SSA name.  If there is a promotion P, describe its
     363              :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     364              :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     365              :    have more than one user.
     366              : 
     367              :    A successful return means that it is possible to go from OP' to OP
     368              :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     369              :    whereas the cast from UNPROM to OP might be a promotion, a sign
     370              :    change, or a nop.
     371              : 
     372              :    E.g. say we have:
     373              : 
     374              :        signed short *ptr = ...;
     375              :        signed short C = *ptr;
     376              :        unsigned short B = (unsigned short) C;    // sign change
     377              :        signed int A = (signed int) B;            // unsigned promotion
     378              :        ...possible other uses of A...
     379              :        unsigned int OP = (unsigned int) A;       // sign change
     380              : 
     381              :    In this case it's possible to go directly from C to OP using:
     382              : 
     383              :        OP = (unsigned int) (unsigned short) C;
     384              :             +------------+ +--------------+
     385              :                promotion      sign change
     386              : 
     387              :    so OP' would be C.  The input to the promotion is B, so UNPROM
     388              :    would describe B.  */
     389              : 
     390              : static tree
     391      7876024 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     392              :                                       vect_unpromoted_value *unprom,
     393              :                                       bool *single_use_p = NULL)
     394              : {
     395      7876024 :   tree op_type = TREE_TYPE (op);
     396      7876024 :   if (!INTEGRAL_TYPE_P (op_type))
     397              :     return NULL_TREE;
     398              : 
     399      7844629 :   tree res = NULL_TREE;
     400      7844629 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     401      7844629 :   unsigned int min_precision = orig_precision;
     402      7844629 :   stmt_vec_info caster = NULL;
     403      9394474 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     404              :     {
     405              :       /* See whether OP is simple enough to vectorize.  */
     406      9186481 :       stmt_vec_info def_stmt_info;
     407      9186481 :       gimple *def_stmt;
     408      9186481 :       vect_def_type dt;
     409      9186481 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     410              :         break;
     411              : 
     412              :       /* If OP is the input of a demotion, skip over it to see whether
     413              :          OP is itself the result of a promotion.  If so, the combined
     414              :          effect of the promotion and the demotion might fit the required
     415              :          pattern, otherwise neither operation fits.
     416              : 
     417              :          This copes with cases such as the result of an arithmetic
     418              :          operation being truncated before being stored, and where that
     419              :          arithmetic operation has been recognized as an over-widened one.  */
     420      9169725 :       if (TYPE_PRECISION (op_type) <= min_precision)
     421              :         {
     422              :           /* Use OP as the UNPROM described above if we haven't yet
     423              :              found a promotion, or if using the new input preserves the
     424              :              sign of the previous promotion.  */
     425      9051389 :           if (!res
     426      1319594 :               || TYPE_PRECISION (unprom->type) == orig_precision
     427        34441 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
     428      9083196 :               || (TYPE_UNSIGNED (op_type)
     429        22153 :                   && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
     430              :             {
     431      9020015 :               unprom->set_op (op, dt, caster);
     432      9020015 :               min_precision = TYPE_PRECISION (op_type);
     433              :             }
     434              :           /* Stop if we've already seen a promotion and if this
     435              :              conversion does more than change the sign.  */
     436        31374 :           else if (TYPE_PRECISION (op_type)
     437        31374 :                    != TYPE_PRECISION (unprom->type))
     438              :             break;
     439              : 
     440              :           /* The sequence now extends to OP.  */
     441              :           res = op;
     442              :         }
     443              : 
     444              :       /* See whether OP is defined by a cast.  Record it as CASTER if
     445              :          the cast is potentially vectorizable.  */
     446      9169684 :       if (!def_stmt)
     447              :         break;
     448      8965576 :       caster = def_stmt_info;
     449              : 
     450              :       /* Ignore pattern statements, since we don't link uses for them.  */
     451      8965576 :       if (caster
     452      8965576 :           && single_use_p
     453      1773590 :           && !STMT_VINFO_RELATED_STMT (caster)
     454     10603716 :           && !has_single_use (res))
     455       992530 :         *single_use_p = false;
     456              : 
     457     16602212 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     458      5689623 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     459              :         break;
     460              : 
     461              :       /* Continue with the input to the cast.  */
     462      1549845 :       op = gimple_assign_rhs1 (def_stmt);
     463      1549845 :       op_type = TREE_TYPE (op);
     464              :     }
     465              :   return res;
     466              : }
     467              : 
     468              : /* OP is an integer operand to an operation that returns TYPE, and we
     469              :    want to treat the operation as a widening one.  So far we can treat
     470              :    it as widening from *COMMON_TYPE.
     471              : 
     472              :    Return true if OP is suitable for such a widening operation,
     473              :    either widening from *COMMON_TYPE or from some supertype of it.
     474              :    Update *COMMON_TYPE to the supertype in the latter case.
     475              : 
     476              :    SHIFT_P is true if OP is a shift amount.  */
     477              : 
     478              : static bool
     479       278486 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     480              :                             tree *common_type)
     481              : {
     482              :   /* Calculate the minimum precision required by OP, without changing
     483              :      the sign of either operand.  */
     484       278486 :   unsigned int precision;
     485       278486 :   if (shift_p)
     486              :     {
     487        12843 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     488              :         return false;
     489        10306 :       precision = TREE_INT_CST_LOW (op);
     490              :     }
     491              :   else
     492              :     {
     493       265643 :       precision = wi::min_precision (wi::to_widest (op),
     494       265643 :                                      TYPE_SIGN (*common_type));
     495       265643 :       if (precision * 2 > TYPE_PRECISION (type))
     496              :         return false;
     497              :     }
     498              : 
     499              :   /* If OP requires a wider type, switch to that type.  The checks
     500              :      above ensure that this is still narrower than the result.  */
     501       262656 :   precision = vect_element_precision (precision);
     502       262656 :   if (TYPE_PRECISION (*common_type) < precision)
     503         6678 :     *common_type = build_nonstandard_integer_type
     504         6678 :       (precision, TYPE_UNSIGNED (*common_type));
     505              :   return true;
     506              : }
     507              : 
     508              : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     509              :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     510              : 
     511              : static bool
     512        41519 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     513              : {
     514        41519 :   if (types_compatible_p (*common_type, new_type))
     515              :     return true;
     516              : 
     517              :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     518         7408 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     519         7408 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     520              :     return true;
     521              : 
     522              :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     523         6799 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     524         6799 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     525              :     {
     526          350 :       *common_type = new_type;
     527          350 :       return true;
     528              :     }
     529              : 
     530              :   /* We have mismatched signs, with the signed type being
     531              :      no wider than the unsigned type.  In this case we need
     532              :      a wider signed type.  */
     533         6449 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     534              :                                 TYPE_PRECISION (new_type));
     535         6449 :   precision *= 2;
     536              : 
     537         6449 :   if (precision * 2 > TYPE_PRECISION (type))
     538              :     return false;
     539              : 
     540           43 :   *common_type = build_nonstandard_integer_type (precision, false);
     541           43 :   return true;
     542              : }
     543              : 
     544              : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     545              :    in which each node either performs CODE or WIDENED_CODE, and where
     546              :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     547              :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     548              :    CODE and WIDENED_CODE are some sort of shift.
     549              : 
     550              :    If STMT_INFO is such a tree, return the number of leaf operands
     551              :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     552              :    to a type that (a) is narrower than the result of STMT_INFO and
     553              :    (b) can hold all leaf operand values.
     554              : 
     555              :    If SUBTYPE then allow that the signs of the operands
     556              :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     557              :    this.
     558              : 
     559              :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     560              :    exists.  */
     561              : 
     562              : static unsigned int
     563    121131353 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     564              :                       code_helper widened_code, bool shift_p,
     565              :                       unsigned int max_nops,
     566              :                       vect_unpromoted_value *unprom, tree *common_type,
     567              :                       enum optab_subtype *subtype = NULL)
     568              : {
     569              :   /* Check for an integer operation with the right code.  */
     570    121131353 :   gimple* stmt = stmt_info->stmt;
     571    121131353 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     572              :     return 0;
     573              : 
     574     97218163 :   code_helper rhs_code;
     575     97218163 :   if (is_gimple_assign (stmt))
     576     82876499 :     rhs_code = gimple_assign_rhs_code (stmt);
     577     14341664 :   else if (is_gimple_call (stmt))
     578     14341664 :     rhs_code = gimple_call_combined_fn (stmt);
     579              :   else
     580              :     return 0;
     581              : 
     582     97218163 :   if (rhs_code != code
     583     97218163 :       && rhs_code != widened_code)
     584              :     return 0;
     585              : 
     586      6003387 :   tree lhs = gimple_get_lhs (stmt);
     587      6003387 :   tree type = TREE_TYPE (lhs);
     588      6003387 :   if (!INTEGRAL_TYPE_P (type))
     589              :     return 0;
     590              : 
     591              :   /* Assume that both operands will be leaf operands.  */
     592      5458159 :   max_nops -= 2;
     593              : 
     594              :   /* Check the operands.  */
     595      5458159 :   unsigned int next_op = 0;
     596      6154633 :   for (unsigned int i = 0; i < 2; ++i)
     597              :     {
     598      5856524 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     599      5856524 :       unsigned int nops = 1;
     600      5856524 :       tree op = gimple_arg (stmt, i);
     601      5856524 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     602              :         {
     603              :           /* We already have a common type from earlier operands.
     604              :              Update it to account for OP.  */
     605       278486 :           this_unprom->set_op (op, vect_constant_def);
     606       278486 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     607              :             return 0;
     608              :         }
     609              :       else
     610              :         {
     611              :           /* Only allow shifts by constants.  */
     612      5578038 :           if (shift_p && i == 1)
     613              :             return 0;
     614              : 
     615      5572145 :           if (rhs_code != code)
     616              :             {
     617              :               /* If rhs_code is widened_code, don't look through further
     618              :                  possible promotions, there is a promotion already embedded
     619              :                  in the WIDEN_*_EXPR.  */
     620         1396 :               if (TREE_CODE (op) != SSA_NAME
     621         1396 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     622            0 :                 return 0;
     623              : 
     624         1396 :               stmt_vec_info def_stmt_info;
     625         1396 :               gimple *def_stmt;
     626         1396 :               vect_def_type dt;
     627         1396 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     628              :                                        &def_stmt))
     629              :                 return 0;
     630         1396 :               this_unprom->set_op (op, dt, NULL);
     631              :             }
     632      5570749 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     633              :                                                           this_unprom))
     634              :             return 0;
     635              : 
     636      5463439 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     637              :             {
     638              :               /* The operand isn't widened.  If STMT_INFO has the code
     639              :                  for an unwidened operation, recursively check whether
     640              :                  this operand is a node of the tree.  */
     641      5019330 :               if (rhs_code != code
     642      5019330 :                   || max_nops == 0
     643      5019739 :                   || this_unprom->dt != vect_internal_def)
     644              :                 return 0;
     645              : 
     646              :               /* Give back the leaf slot allocated above now that we're
     647              :                  not treating this as a leaf operand.  */
     648          409 :               max_nops += 1;
     649              : 
     650              :               /* Recursively process the definition of the operand.  */
     651          409 :               stmt_vec_info def_stmt_info
     652          409 :                 = vect_get_internal_def (vinfo, this_unprom->op);
     653              : 
     654          409 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     655              :                                            widened_code, shift_p, max_nops,
     656              :                                            this_unprom, common_type,
     657              :                                            subtype);
     658          409 :               if (nops == 0)
     659              :                 return 0;
     660              : 
     661          273 :               max_nops -= nops;
     662              :             }
     663              :           else
     664              :             {
     665              :               /* Make sure that the operand is narrower than the result.  */
     666       444109 :               if (TYPE_PRECISION (this_unprom->type) * 2
     667       444109 :                   > TYPE_PRECISION (type))
     668              :                 return 0;
     669              : 
     670              :               /* Update COMMON_TYPE for the new operand.  */
     671       439749 :               if (i == 0)
     672       398230 :                 *common_type = this_unprom->type;
     673        41519 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     674              :                                                  common_type))
     675              :                 {
     676         6406 :                   if (subtype)
     677              :                     {
     678              :                       /* See if we can sign extend the smaller type.  */
     679          202 :                       if (TYPE_PRECISION (this_unprom->type)
     680          202 :                           > TYPE_PRECISION (*common_type))
     681           27 :                         *common_type = this_unprom->type;
     682          202 :                       *subtype = optab_vector_mixed_sign;
     683              :                     }
     684              :                   else
     685              :                     return 0;
     686              :                 }
     687              :             }
     688              :         }
     689       696474 :       next_op += nops;
     690              :     }
     691              :   return next_op;
     692              : }
     693              : 
     694              : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     695              :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     696              : 
     697              : static tree
     698      1951213 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     699              : {
     700            0 :   return make_temp_ssa_name (type, stmt, "patt");
     701              : }
     702              : 
     703              : /* STMT2_INFO describes a type conversion that could be split into STMT1
     704              :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     705              :    input.  Try to do this using pattern statements, returning true on
     706              :    success.  */
     707              : 
     708              : static bool
     709        29428 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     710              :                       gimple *stmt1, tree vectype)
     711              : {
     712        29428 :   if (is_pattern_stmt_p (stmt2_info))
     713              :     {
     714              :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     715              :          the pattern is attached.  */
     716          436 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     717          436 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     718              : 
     719          436 :       if (dump_enabled_p ())
     720           19 :         dump_printf_loc (MSG_NOTE, vect_location,
     721              :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     722              : 
     723              :       /* Since STMT2_INFO is a pattern statement, we can change it
     724              :          in-situ without worrying about changing the code for the
     725              :          containing block.  */
     726          436 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     727              : 
     728          436 :       if (dump_enabled_p ())
     729              :         {
     730           19 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     731           19 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     732              :                            stmt2_info->stmt);
     733              :         }
     734              : 
     735          436 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     736          436 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     737              :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     738              :            to the end of the definition sequence.  */
     739          433 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     740              :       else
     741              :         {
     742              :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     743              :              before it.  */
     744            3 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     745            3 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     746              :         }
     747          436 :       return true;
     748              :     }
     749              :   else
     750              :     {
     751              :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     752              :          two-statement pattern now.  */
     753        28992 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     754        28992 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     755        28992 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     756        28992 :       if (!lhs_vectype)
     757              :         return false;
     758              : 
     759        28992 :       if (dump_enabled_p ())
     760         1890 :         dump_printf_loc (MSG_NOTE, vect_location,
     761              :                          "Splitting statement: %G", stmt2_info->stmt);
     762              : 
     763              :       /* Add STMT1 as a singleton pattern definition sequence.  */
     764        28992 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     765        28992 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     766        28992 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     767              : 
     768              :       /* Build the second of the two pattern statements.  */
     769        28992 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     770        28992 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     771        28992 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     772              : 
     773        28992 :       if (dump_enabled_p ())
     774              :         {
     775         1890 :           dump_printf_loc (MSG_NOTE, vect_location,
     776              :                            "into pattern statements: %G", stmt1);
     777         1890 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     778              :                            (gimple *) new_stmt2);
     779              :         }
     780              : 
     781        28992 :       return true;
     782              :     }
     783              : }
     784              : 
     785              : /* Look for the following pattern
     786              :         X = x[i]
     787              :         Y = y[i]
     788              :         DIFF = X - Y
     789              :         DAD = ABS_EXPR<DIFF>
     790              : 
     791              :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     792              :    HALF_TYPE and UNPROM will be set should the statement be found to
     793              :    be a widened operation.
     794              :    DIFF_STMT will be set to the MINUS_EXPR
     795              :    statement that precedes the ABS_STMT if it is a MINUS_EXPR..
     796              :  */
     797              : static bool
     798     20353353 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     799              :                                 tree *half_type,
     800              :                                 vect_unpromoted_value unprom[2],
     801              :                                 gassign **diff_stmt)
     802              : {
     803     20353353 :   if (!abs_stmt)
     804              :     return false;
     805              : 
     806              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     807              :      inside the loop (in case we are analyzing an outer-loop).  */
     808     20353353 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     809     20353353 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     810              :     return false;
     811              : 
     812        21873 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     813        21873 :   tree abs_type = TREE_TYPE (abs_oprnd);
     814        21873 :   if (!abs_oprnd)
     815              :     return false;
     816        17016 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     817         5153 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     818        26880 :       || TYPE_UNSIGNED (abs_type))
     819              :     return false;
     820              : 
     821              :   /* Peel off conversions from the ABS input.  This can involve sign
     822              :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     823              :      or signed promotion, but it can't include unsigned promotion.
     824              :      (Note that ABS of an unsigned promotion should have been folded
     825              :      away before now anyway.)  */
     826         5007 :   vect_unpromoted_value unprom_diff;
     827         5007 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     828              :                                                     &unprom_diff);
     829         5007 :   if (!abs_oprnd)
     830              :     return false;
     831         4711 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     832         4711 :       && TYPE_UNSIGNED (unprom_diff.type))
     833              :     return false;
     834              : 
     835              :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     836         4711 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     837         4711 :   if (!diff_stmt_vinfo)
     838              :     return false;
     839              : 
     840         4541 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     841         4541 :   if (diff_stmt && diff
     842         3490 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     843         6257 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     844          272 :     *diff_stmt = diff;
     845              : 
     846              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     847              :      inside the loop (in case we are analyzing an outer-loop).  */
     848         4541 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     849         4541 :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     850              :                             false, 2, unprom, half_type))
     851              :     return true;
     852              : 
     853              :   return false;
     854              : }
     855              : 
     856              : /* Convert UNPROM to TYPE and return the result, adding new statements
     857              :    to STMT_INFO's pattern definition statements if no better way is
     858              :    available.  VECTYPE is the vector form of TYPE.
     859              : 
     860              :    If SUBTYPE then convert the type based on the subtype.  */
     861              : 
     862              : static tree
     863       447260 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     864              :                     vect_unpromoted_value *unprom, tree vectype,
     865              :                     enum optab_subtype subtype = optab_default)
     866              : {
     867              :   /* Update the type if the signs differ.  */
     868       447260 :   if (subtype == optab_vector_mixed_sign)
     869              :     {
     870          190 :       gcc_assert (!TYPE_UNSIGNED (type));
     871          190 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     872              :         {
     873           95 :           type = unsigned_type_for (type);
     874           95 :           vectype = unsigned_type_for (vectype);
     875              :         }
     876              :     }
     877              : 
     878              :   /* Check for a no-op conversion.  */
     879       447260 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     880       151694 :     return unprom->op;
     881              : 
     882              :   /* Allow the caller to create constant vect_unpromoted_values.  */
     883       295566 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     884       183775 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     885              : 
     886       111791 :   tree input = unprom->op;
     887       111791 :   if (unprom->caster)
     888              :     {
     889        59361 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     890        59361 :       tree lhs_type = TREE_TYPE (lhs);
     891              : 
     892              :       /* If the result of the existing cast is the right width, use it
     893              :          instead of the source of the cast.  */
     894        59361 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     895              :         input = lhs;
     896              :       /* If the precision we want is between the source and result
     897              :          precisions of the existing cast, try splitting the cast into
     898              :          two and tapping into a mid-way point.  */
     899        57321 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     900        57321 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     901              :         {
     902              :           /* In order to preserve the semantics of the original cast,
     903              :              give the mid-way point the same signedness as the input value.
     904              : 
     905              :              It would be possible to use a signed type here instead if
     906              :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     907              :              make the sign of the midtype sensitive to the order in
     908              :              which we process the statements, since the signedness of
     909              :              TYPE is the signedness required by just one of possibly
     910              :              many users.  Also, unsigned promotions are usually as cheap
     911              :              as or cheaper than signed ones, so it's better to keep an
     912              :              unsigned promotion.  */
     913        29428 :           tree midtype = build_nonstandard_integer_type
     914        29428 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     915        29428 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     916        29428 :           if (vec_midtype)
     917              :             {
     918        29428 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     919        29428 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     920              :                                                        unprom->op);
     921        29428 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     922              :                                          vec_midtype))
     923            0 :                 append_pattern_def_seq (vinfo, stmt_info,
     924              :                                         new_stmt, vec_midtype);
     925              :             }
     926              :         }
     927              : 
     928              :       /* See if we can reuse an existing result.  */
     929        59361 :       if (types_compatible_p (type, TREE_TYPE (input)))
     930              :         return input;
     931              :     }
     932              : 
     933              :   /* We need a new conversion statement.  */
     934        90310 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     935        90310 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     936              : 
     937              :   /* If OP is an external value, see if we can insert the new statement
     938              :      on an incoming edge.  */
     939        90310 :   if (input == unprom->op && unprom->dt == vect_external_def)
     940         8427 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     941              :       {
     942          716 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     943          716 :         gcc_assert (!new_bb);
     944              :         return new_op;
     945              :       }
     946              : 
     947              :   /* As a (common) last resort, add the statement to the pattern itself.  */
     948        89594 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     949        89594 :   return new_op;
     950              : }
     951              : 
     952              : /* Invoke vect_convert_input for N elements of UNPROM and store the
     953              :    result in the corresponding elements of RESULT.
     954              : 
     955              :    If SUBTYPE then convert the type based on the subtype.  */
     956              : 
     957              : static void
     958       227221 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     959              :                      tree *result, tree type, vect_unpromoted_value *unprom,
     960              :                      tree vectype, enum optab_subtype subtype = optab_default)
     961              : {
     962       674344 :   for (unsigned int i = 0; i < n; ++i)
     963              :     {
     964              :       unsigned int j;
     965       666692 :       for (j = 0; j < i; ++j)
     966       219902 :         if (unprom[j].op == unprom[i].op)
     967              :           break;
     968              : 
     969       447123 :       if (j < i)
     970          333 :         result[i] = result[j];
     971              :       else
     972       446790 :         result[i] = vect_convert_input (vinfo, stmt_info,
     973       446790 :                                         type, &unprom[i], vectype, subtype);
     974              :     }
     975       227221 : }
     976              : 
     977              : /* The caller has created a (possibly empty) sequence of pattern definition
     978              :    statements followed by a single statement PATTERN_STMT.  Cast the result
     979              :    of this final statement to TYPE.  If a new statement is needed, add
     980              :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     981              :    and return the new statement, otherwise return PATTERN_STMT as-is.
     982              :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     983              : 
     984              : static gimple *
     985       252049 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     986              :                      gimple *pattern_stmt, tree vecitype)
     987              : {
     988       252049 :   tree lhs = gimple_get_lhs (pattern_stmt);
     989       252049 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     990              :     {
     991       224829 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
     992       224829 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     993       224829 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     994              :     }
     995       252049 :   return pattern_stmt;
     996              : }
     997              : 
     998              : /* Return true if STMT_VINFO describes a reduction for which reassociation
     999              :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1000              :    a reduction chain and optimistically assume that all statements
    1001              :    except the last allow reassociation.
    1002              :    Also require it to have code CODE and to be a reduction
    1003              :    in the outermost loop.  When returning true, store the operands in
    1004              :    *OP0_OUT and *OP1_OUT.  */
    1005              : 
    1006              : static bool
    1007     90362266 : vect_reassociating_reduction_p (vec_info *vinfo,
    1008              :                                 stmt_vec_info stmt_info, tree_code code,
    1009              :                                 tree *op0_out, tree *op1_out)
    1010              : {
    1011     90362266 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1012     11337874 :   if (!loop_info)
    1013              :     return false;
    1014              : 
    1015     11337874 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1016     12277369 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1017              :     return false;
    1018              : 
    1019              :   /* We don't allow changing the order of the computation in the inner-loop
    1020              :      when doing outer-loop vectorization.  */
    1021      2175573 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1022     92430808 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1023              :     return false;
    1024              : 
    1025      2123838 :   if (!vect_is_reduction (stmt_info))
    1026              :     return false;
    1027              : 
    1028       117108 :   if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1029       117108 :                                    code))
    1030              :     return false;
    1031              : 
    1032       107031 :   *op0_out = gimple_assign_rhs1 (assign);
    1033       107031 :   *op1_out = gimple_assign_rhs2 (assign);
    1034       107031 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1035        41887 :     std::swap (*op0_out, *op1_out);
    1036              :   return true;
    1037              : }
    1038              : 
    1039              : /* Return true iff the target has a vector optab implementing the operation
    1040              :    CODE on type VECTYPE with SUBTYPE.  */
    1041              : 
    1042              : static bool
    1043       792926 : target_has_vecop_for_code (tree_code code, tree vectype,
    1044              :                            enum optab_subtype subtype = optab_vector)
    1045              : {
    1046       792926 :   optab voptab = optab_for_tree_code (code, vectype, subtype);
    1047       792926 :   return voptab && can_implement_p (voptab, TYPE_MODE (vectype));
    1048              : }
    1049              : 
    1050              : /* match.pd function to match
    1051              :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1052              :    with conditions:
    1053              :    1) @1, @2, c, d, a, b are all integral type.
    1054              :    2) There's single_use for both @1 and @2.
    1055              :    3) a, c have same precision.
    1056              :    4) c and @1 have different precision.
    1057              :    5) c, d are the same type or they can differ in sign when convert is
    1058              :    truncation.
    1059              : 
    1060              :    record a and c and d and @3.  */
    1061              : 
    1062              : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1063              : 
    1064              : /* Function vect_recog_cond_expr_convert
    1065              : 
    1066              :    Try to find the following pattern:
    1067              : 
    1068              :    TYPE_AB A,B;
    1069              :    TYPE_CD C,D;
    1070              :    TYPE_E E;
    1071              :    TYPE_E op_true = (TYPE_E) A;
    1072              :    TYPE_E op_false = (TYPE_E) B;
    1073              : 
    1074              :    E = C cmp D ? op_true : op_false;
    1075              : 
    1076              :    where
    1077              :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1078              :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1079              :    single_use of op_true and op_false.
    1080              :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1081              : 
    1082              :    Input:
    1083              : 
    1084              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1085              :    here it starts with E = c cmp D ? op_true : op_false;
    1086              : 
    1087              :    Output:
    1088              : 
    1089              :    TYPE1 E' = C cmp D ? A : B;
    1090              :    TYPE3 E = (TYPE3) E';
    1091              : 
    1092              :    There may extra nop_convert for A or B to handle different signness.
    1093              : 
    1094              :    * TYPE_OUT: The vector type of the output of this pattern.
    1095              : 
    1096              :    * Return value: A new stmt that will be used to replace the sequence of
    1097              :    stmts that constitute the pattern. In this case it will be:
    1098              :    E = (TYPE3)E';
    1099              :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1100              : 
    1101              : static gimple *
    1102     30190557 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1103              :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1104              : {
    1105     30190557 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1106     20445752 :   tree lhs, match[4], temp, type, new_lhs, op2, op1;
    1107     20445752 :   gimple *cond_stmt;
    1108     20445752 :   gimple *pattern_stmt;
    1109     30190526 :   enum tree_code code = NOP_EXPR;
    1110              : 
    1111     20445752 :   if (!last_stmt)
    1112              :     return NULL;
    1113              : 
    1114     20445752 :   lhs = gimple_assign_lhs (last_stmt);
    1115              : 
    1116              :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1117              :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1118     20445752 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1119              :     return NULL;
    1120              : 
    1121           31 :   if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    1122            8 :     code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
    1123           23 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
    1124            0 :     code = FIX_TRUNC_EXPR;
    1125              : 
    1126           31 :   op1 = match[1];
    1127           31 :   op2 = match[2];
    1128           31 :   type = TREE_TYPE (op1);
    1129              :   /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
    1130              :      SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
    1131              :      Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
    1132              :      or CONVERT_EXPR.  */
    1133           31 :   if (TREE_CODE (op1) == REAL_CST)
    1134              :     {
    1135            8 :       op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
    1136            8 :       type = TREE_TYPE (op2);
    1137            8 :       if (op1 == NULL_TREE)
    1138              :         return NULL;
    1139              :     }
    1140           23 :   else if (TREE_CODE (op2) == REAL_CST)
    1141              :     {
    1142            0 :       op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
    1143            0 :       if (op2 == NULL_TREE)
    1144              :         return NULL;
    1145              :     }
    1146           23 :   else if (code == NOP_EXPR)
    1147              :     {
    1148           23 :       if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1149              :         {
    1150           23 :           op2 = vect_recog_temp_ssa_var (type, NULL);
    1151           23 :           gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1152           23 :           append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt);
    1153              :         }
    1154              :     }
    1155              : 
    1156           31 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1157              : 
    1158           31 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1159           31 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1160              :                                                  op1, op2));
    1161           31 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt);
    1162           31 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1163           31 :   pattern_stmt = gimple_build_assign (new_lhs, code, temp);
    1164           31 :   *type_out = NULL_TREE;
    1165              : 
    1166           31 :   if (dump_enabled_p ())
    1167            8 :     dump_printf_loc (MSG_NOTE, vect_location,
    1168              :                      "created pattern stmt: %G", pattern_stmt);
    1169              :   return pattern_stmt;
    1170              : }
    1171              : 
    1172              : /* Function vect_recog_dot_prod_pattern
    1173              : 
    1174              :    Try to find the following pattern:
    1175              : 
    1176              :      type1a x_t
    1177              :      type1b y_t;
    1178              :      TYPE1 prod;
    1179              :      TYPE2 sum = init;
    1180              :    loop:
    1181              :      sum_0 = phi <init, sum_1>
    1182              :      S1  x_t = ...
    1183              :      S2  y_t = ...
    1184              :      S3  x_T = (TYPE1) x_t;
    1185              :      S4  y_T = (TYPE1) y_t;
    1186              :      S5  prod = x_T * y_T;
    1187              :      [S6  prod = (TYPE2) prod;  #optional]
    1188              :      S7  sum_1 = prod + sum_0;
    1189              : 
    1190              :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1191              :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1192              :    'type1a' and 'type1b' can differ.
    1193              : 
    1194              :    Input:
    1195              : 
    1196              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1197              :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1198              :    will be detected.
    1199              : 
    1200              :    Output:
    1201              : 
    1202              :    * TYPE_OUT: The type of the output  of this pattern.
    1203              : 
    1204              :    * Return value: A new stmt that will be used to replace the sequence of
    1205              :    stmts that constitute the pattern. In this case it will be:
    1206              :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1207              : 
    1208              :    Note: The dot-prod idiom is a widening reduction pattern that is
    1209              :          vectorized without preserving all the intermediate results. It
    1210              :          produces only N/2 (widened) results (by summing up pairs of
    1211              :          intermediate results) rather than all N results.  Therefore, we
    1212              :          cannot allow this pattern when we want to get all the results and in
    1213              :          the correct order (as is the case when this computation is in an
    1214              :          inner-loop nested in an outer-loop that us being vectorized).  */
    1215              : 
    1216              : static gimple *
    1217     30121150 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1218              :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1219              : {
    1220     30121150 :   tree oprnd0, oprnd1;
    1221     30121150 :   gimple *last_stmt = stmt_vinfo->stmt;
    1222     30121150 :   tree type, half_type;
    1223     30121150 :   gimple *pattern_stmt;
    1224     30121150 :   tree var;
    1225              : 
    1226              :   /* Look for the following pattern
    1227              :           DX = (TYPE1) X;
    1228              :           DY = (TYPE1) Y;
    1229              :           DPROD = DX * DY;
    1230              :           DDPROD = (TYPE2) DPROD;
    1231              :           sum_1 = DDPROD + sum_0;
    1232              :      In which
    1233              :      - DX is double the size of X
    1234              :      - DY is double the size of Y
    1235              :      - DX, DY, DPROD all have the same type but the sign
    1236              :        between X, Y and DPROD can differ.
    1237              :      - sum is the same size of DPROD or bigger
    1238              :      - sum has been recognized as a reduction variable.
    1239              : 
    1240              :      This is equivalent to:
    1241              :        DPROD = X w* Y;          #widen mult
    1242              :        sum_1 = DPROD w+ sum_0;  #widen summation
    1243              :      or
    1244              :        DPROD = X w* Y;          #widen mult
    1245              :        sum_1 = DPROD + sum_0;   #summation
    1246              :    */
    1247              : 
    1248              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1249              :      of the above pattern.  */
    1250              : 
    1251     30121150 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1252              :                                        &oprnd0, &oprnd1))
    1253              :     return NULL;
    1254              : 
    1255        36075 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1256              : 
    1257        36075 :   vect_unpromoted_value unprom_mult;
    1258        36075 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1259              : 
    1260              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1261              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1262              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1263              :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1264        36075 :   if (!oprnd0)
    1265              :     return NULL;
    1266              : 
    1267        25446 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1268        25446 :   if (!mult_vinfo)
    1269              :     return NULL;
    1270              : 
    1271              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1272              :      inside the loop (in case we are analyzing an outer-loop).  */
    1273        74196 :   vect_unpromoted_value unprom0[2];
    1274        24732 :   enum optab_subtype subtype = optab_vector;
    1275        24732 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1276              :                              false, 2, unprom0, &half_type, &subtype))
    1277              :     return NULL;
    1278              : 
    1279              :   /* If there are two widening operations, make sure they agree on the sign
    1280              :      of the extension.  The result of an optab_vector_mixed_sign operation
    1281              :      is signed; otherwise, the result has the same sign as the operands.  */
    1282          972 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1283         1523 :       && (subtype == optab_vector_mixed_sign
    1284          551 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1285          387 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1286              :     return NULL;
    1287              : 
    1288          891 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1289              : 
    1290              :   /* If the inputs have mixed signs, canonicalize on using the signed
    1291              :      input type for analysis.  This also helps when emulating mixed-sign
    1292              :      operations using signed operations.  */
    1293          891 :   if (subtype == optab_vector_mixed_sign)
    1294          157 :     half_type = signed_type_for (half_type);
    1295              : 
    1296          891 :   tree half_vectype;
    1297          891 :   if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1298              :                                         type_out, &half_vectype, subtype))
    1299              :     {
    1300              :       /* We can emulate a mixed-sign dot-product using a sequence of
    1301              :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1302          436 :       if (subtype != optab_vector_mixed_sign
    1303          436 :           || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
    1304              :                                                DOT_PROD_EXPR, half_type,
    1305              :                                                type_out, &half_vectype,
    1306              :                                                optab_vector))
    1307          427 :         return NULL;
    1308              : 
    1309            9 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1310              :                                                *type_out);
    1311              :     }
    1312              : 
    1313              :   /* Get the inputs in the appropriate types.  */
    1314          464 :   tree mult_oprnd[2];
    1315          464 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1316              :                        unprom0, half_vectype, subtype);
    1317              : 
    1318          464 :   var = vect_recog_temp_ssa_var (type, NULL);
    1319          464 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1320              :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1321              : 
    1322          464 :   return pattern_stmt;
    1323              : }
    1324              : 
    1325              : 
    1326              : /* Function vect_recog_sad_pattern
    1327              : 
    1328              :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1329              : 
    1330              :      type x_t, y_t;
    1331              :      signed TYPE1 diff, abs_diff;
    1332              :      TYPE2 sum = init;
    1333              :    loop:
    1334              :      sum_0 = phi <init, sum_1>
    1335              :      S1  x_t = ...
    1336              :      S2  y_t = ...
    1337              :      S3  x_T = (TYPE1) x_t;
    1338              :      S4  y_T = (TYPE1) y_t;
    1339              :      S5  diff = x_T - y_T;
    1340              :      S6  abs_diff = ABS_EXPR <diff>;
    1341              :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1342              :      S8  sum_1 = abs_diff + sum_0;
    1343              : 
    1344              :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1345              :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1346              :    computation.
    1347              : 
    1348              :    Input:
    1349              : 
    1350              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1351              :    example, when this function is called with S8, the pattern
    1352              :    {S3,S4,S5,S6,S7,S8} will be detected.
    1353              : 
    1354              :    Output:
    1355              : 
    1356              :    * TYPE_OUT: The type of the output of this pattern.
    1357              : 
    1358              :    * Return value: A new stmt that will be used to replace the sequence of
    1359              :    stmts that constitute the pattern. In this case it will be:
    1360              :         SAD_EXPR <x_t, y_t, sum_0>
    1361              :   */
    1362              : 
    1363              : static gimple *
    1364     30120691 : vect_recog_sad_pattern (vec_info *vinfo,
    1365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1366              : {
    1367     30120691 :   gimple *last_stmt = stmt_vinfo->stmt;
    1368     30120691 :   tree half_type;
    1369              : 
    1370              :   /* Look for the following pattern
    1371              :           DX = (TYPE1) X;
    1372              :           DY = (TYPE1) Y;
    1373              :           DDIFF = DX - DY;
    1374              :           DAD = ABS_EXPR <DDIFF>;
    1375              :           DDPROD = (TYPE2) DPROD;
    1376              :           sum_1 = DAD + sum_0;
    1377              :      In which
    1378              :      - DX is at least double the size of X
    1379              :      - DY is at least double the size of Y
    1380              :      - DX, DY, DDIFF, DAD all have the same type
    1381              :      - sum is the same size of DAD or bigger
    1382              :      - sum has been recognized as a reduction variable.
    1383              : 
    1384              :      This is equivalent to:
    1385              :        DDIFF = X w- Y;          #widen sub
    1386              :        DAD = ABS_EXPR <DDIFF>;
    1387              :        sum_1 = DAD w+ sum_0;    #widen summation
    1388              :      or
    1389              :        DDIFF = X w- Y;          #widen sub
    1390              :        DAD = ABS_EXPR <DDIFF>;
    1391              :        sum_1 = DAD + sum_0;     #summation
    1392              :    */
    1393              : 
    1394              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1395              :      of the above pattern.  */
    1396              : 
    1397     30120691 :   tree plus_oprnd0, plus_oprnd1;
    1398     30120691 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1399              :                                        &plus_oprnd0, &plus_oprnd1))
    1400              :     return NULL;
    1401              : 
    1402        35611 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1403              : 
    1404              :   /* Any non-truncating sequence of conversions is OK here, since
    1405              :      with a successful match, the result of the ABS(U) is known to fit
    1406              :      within the nonnegative range of the result type.  (It cannot be the
    1407              :      negative of the minimum signed value due to the range of the widening
    1408              :      MINUS_EXPR.)  */
    1409        35611 :   vect_unpromoted_value unprom_abs;
    1410        35611 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1411              :                                                       &unprom_abs);
    1412              : 
    1413              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1414              :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1415              :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1416              :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1417              : 
    1418        35611 :   if (!plus_oprnd0)
    1419              :     return NULL;
    1420              : 
    1421        24982 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1422        24982 :   if (!abs_stmt_vinfo)
    1423              :     return NULL;
    1424              : 
    1425              :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1426              :      inside the loop (in case we are analyzing an outer-loop).  */
    1427        24268 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1428        72804 :   vect_unpromoted_value unprom[2];
    1429              : 
    1430        24268 :   if (!abs_stmt)
    1431              :     {
    1432     30120701 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1433          276 :       if (!abd_stmt
    1434          276 :           || !gimple_call_internal_p (abd_stmt)
    1435            0 :           || gimple_call_num_args (abd_stmt) != 2)
    1436              :         return NULL;
    1437              : 
    1438            0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1439            0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1440              : 
    1441            0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
    1442            0 :           || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1443              :         {
    1444            0 :           unprom[0].op = abd_oprnd0;
    1445            0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1446            0 :           unprom[1].op = abd_oprnd1;
    1447            0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1448              :         }
    1449              :       else
    1450              :         return NULL;
    1451              : 
    1452            0 :       half_type = unprom[0].type;
    1453              :     }
    1454        23931 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1455              :                                             unprom, NULL))
    1456              :     return NULL;
    1457              : 
    1458          426 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1459              : 
    1460          426 :   tree half_vectype;
    1461          426 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1462              :                                         type_out, &half_vectype))
    1463              :     return NULL;
    1464              : 
    1465              :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1466          266 :   tree sad_oprnd[2];
    1467          266 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1468              :                        unprom, half_vectype);
    1469              : 
    1470          266 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1471          266 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1472              :                                               sad_oprnd[1], plus_oprnd1);
    1473              : 
    1474          266 :   return pattern_stmt;
    1475              : }
    1476              : 
    1477              : /* Function vect_recog_abd_pattern
    1478              : 
    1479              :    Try to find the following ABsolute Difference (ABD) or
    1480              :    widening ABD (WIDEN_ABD) pattern:
    1481              : 
    1482              :    TYPE1 x;
    1483              :    TYPE2 y;
    1484              :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1485              :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1486              :    TYPE3 diff = x_cast - y_cast;
    1487              :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1488              :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1489              : 
    1490              :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1491              :    twice as wide as TYPE3.
    1492              : 
    1493              :    Input:
    1494              : 
    1495              :    * STMT_VINFO: The stmt from which the pattern search begins
    1496              : 
    1497              :    Output:
    1498              : 
    1499              :    * TYPE_OUT: The type of the output of this pattern
    1500              : 
    1501              :    * Return value: A new stmt that will be used to replace the sequence of
    1502              :      stmts that constitute the pattern, principally:
    1503              :         out = IFN_ABD (x, y)
    1504              :         out = IFN_WIDEN_ABD (x, y)
    1505              :  */
    1506              : 
    1507              : static gimple *
    1508     30074104 : vect_recog_abd_pattern (vec_info *vinfo,
    1509              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1510              : {
    1511     50403526 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1512     20329422 :   if (!last_stmt)
    1513              :     return NULL;
    1514              : 
    1515     20329422 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1516              : 
    1517     60988266 :   vect_unpromoted_value unprom[2];
    1518     20329422 :   gassign *diff_stmt = NULL;
    1519     20329422 :   tree abd_in_type;
    1520     20329422 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
    1521              :                                        unprom, &diff_stmt))
    1522              :     {
    1523              :       /* We cannot try further without having a non-widening MINUS.  */
    1524     20328194 :       if (!diff_stmt)
    1525              :         return NULL;
    1526              : 
    1527          272 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1528          272 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1529          272 :       abd_in_type = signed_type_for (out_type);
    1530              :     }
    1531              : 
    1532         1500 :   tree abd_out_type = abd_in_type;
    1533              : 
    1534         1500 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1535         1500 :   if (!vectype_in)
    1536              :     return NULL;
    1537              : 
    1538         1484 :   internal_fn ifn = IFN_ABD;
    1539         1484 :   tree vectype_out = vectype_in;
    1540              : 
    1541         1484 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1542         1484 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1543              :     {
    1544         1123 :       tree mid_type
    1545         1123 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1546         1123 :                                           TYPE_UNSIGNED (abd_in_type));
    1547         1123 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1548              : 
    1549         1123 :       code_helper dummy_code;
    1550         1123 :       int dummy_int;
    1551         1123 :       auto_vec<tree> dummy_vec;
    1552         1123 :       if (mid_vectype
    1553         1123 :           && supportable_widening_operation (IFN_VEC_WIDEN_ABD,
    1554              :                                              mid_vectype, vectype_in, false,
    1555              :                                              &dummy_code, &dummy_code,
    1556              :                                              &dummy_int, &dummy_vec))
    1557              :         {
    1558            0 :           ifn = IFN_VEC_WIDEN_ABD;
    1559            0 :           abd_out_type = mid_type;
    1560            0 :           vectype_out = mid_vectype;
    1561              :         }
    1562         1123 :     }
    1563              : 
    1564         1123 :   if (ifn == IFN_ABD
    1565         1484 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1566              :                                           OPTIMIZE_FOR_SPEED))
    1567              :     return NULL;
    1568              : 
    1569            0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1570              : 
    1571            0 :   tree abd_oprnds[2];
    1572            0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1573              :                        abd_in_type, unprom, vectype_in);
    1574              : 
    1575            0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1576              : 
    1577            0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1578            0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1579              :                                                 abd_oprnds[0], abd_oprnds[1]);
    1580            0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1581            0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1582              : 
    1583            0 :   gimple *stmt = abd_stmt;
    1584            0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1585            0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1586            0 :       && !TYPE_UNSIGNED (abd_out_type))
    1587              :     {
    1588            0 :       tree unsign = unsigned_type_for (abd_out_type);
    1589            0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1590            0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1591              :     }
    1592              : 
    1593            0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1594              : }
    1595              : 
    1596              : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1597              :    so that it can be treated as though it had the form:
    1598              : 
    1599              :       A_TYPE a;
    1600              :       B_TYPE b;
    1601              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1602              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1603              :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1604              :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1605              :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1606              : 
    1607              :    Try to replace the pattern with:
    1608              : 
    1609              :       A_TYPE a;
    1610              :       B_TYPE b;
    1611              :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1612              :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1613              :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1614              :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1615              : 
    1616              :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1617              : 
    1618              :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1619              :    name of the pattern being matched, for dump purposes.  */
    1620              : 
    1621              : static gimple *
    1622    121077370 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1623              :                              stmt_vec_info last_stmt_info, tree *type_out,
    1624              :                              tree_code orig_code, code_helper wide_code,
    1625              :                              bool shift_p, const char *name)
    1626              : {
    1627    121077370 :   gimple *last_stmt = last_stmt_info->stmt;
    1628              : 
    1629    363232110 :   vect_unpromoted_value unprom[2];
    1630    121077370 :   tree half_type;
    1631    121077370 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1632              :                              shift_p, 2, unprom, &half_type))
    1633              : 
    1634              :     return NULL;
    1635              : 
    1636              :   /* Pattern detected.  */
    1637       293353 :   vect_pattern_detected (name, last_stmt);
    1638              : 
    1639       293353 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1640       293353 :   tree itype = type;
    1641       293353 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1642       293353 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1643       203194 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1644       203194 :                                             TYPE_UNSIGNED (half_type));
    1645              : 
    1646              :   /* Check target support  */
    1647       293353 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1648       293353 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1649       293353 :   tree ctype = itype;
    1650       293353 :   tree vecctype = vecitype;
    1651       293353 :   if (orig_code == MINUS_EXPR
    1652         8611 :       && TYPE_UNSIGNED (itype)
    1653       297428 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1654              :     {
    1655              :       /* Subtraction is special, even if half_type is unsigned and no matter
    1656              :          whether type is signed or unsigned, if type is wider than itype,
    1657              :          we need to sign-extend from the widening operation result to the
    1658              :          result type.
    1659              :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1660              :          itype unsigned short and type either int or unsigned int.
    1661              :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1662              :          (unsigned short) 0xffff, but for type int we want the result -1
    1663              :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1664          612 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1665          612 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1666              :     }
    1667              : 
    1668       293353 :   code_helper dummy_code;
    1669       293353 :   int dummy_int;
    1670       293353 :   auto_vec<tree> dummy_vec;
    1671       293353 :   if (!vectype
    1672       293353 :       || !vecitype
    1673       229886 :       || !vecctype
    1674       523239 :       || !supportable_widening_operation (wide_code, vecitype, vectype, true,
    1675              :                                           &dummy_code, &dummy_code,
    1676              :                                           &dummy_int, &dummy_vec))
    1677       192660 :     return NULL;
    1678              : 
    1679       100693 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1680       100693 :   if (!*type_out)
    1681              :     return NULL;
    1682              : 
    1683       100693 :   tree oprnd[2];
    1684       100693 :   vect_convert_inputs (vinfo, last_stmt_info,
    1685              :                        2, oprnd, half_type, unprom, vectype);
    1686              : 
    1687       100693 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1688       100693 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1689              : 
    1690       100693 :   if (vecctype != vecitype)
    1691            0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1692              :                                         pattern_stmt, vecitype);
    1693              : 
    1694       100693 :   return vect_convert_output (vinfo, last_stmt_info,
    1695       100693 :                               type, pattern_stmt, vecctype);
    1696       293353 : }
    1697              : 
    1698              : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1699              :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1700              : 
    1701              : static gimple *
    1702     30145520 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1703              :                                tree *type_out)
    1704              : {
    1705     30145520 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1706     30145520 :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1707     30145520 :                                       "vect_recog_widen_mult_pattern");
    1708              : }
    1709              : 
    1710              : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1711              :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1712              : 
    1713              : static gimple *
    1714     30405606 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1715              :                                tree *type_out)
    1716              : {
    1717     30405606 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1718     30405606 :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1719     30405606 :                                       false, "vect_recog_widen_plus_pattern");
    1720              : }
    1721              : 
    1722              : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1723              :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1724              : static gimple *
    1725     30405606 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1726              :                                tree *type_out)
    1727              : {
    1728     30405606 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1729     30405606 :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1730     30405606 :                                       false, "vect_recog_widen_minus_pattern");
    1731              : }
    1732              : 
    1733              : /* Try to detect abd on widened inputs, converting IFN_ABD
    1734              :    to IFN_VEC_WIDEN_ABD.  */
    1735              : static gimple *
    1736     30405606 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1737              :                               tree *type_out)
    1738              : {
    1739     30405606 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1740     27863579 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1741              :     return NULL;
    1742              : 
    1743      2919879 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1744              : 
    1745      2919879 :   tree in_type = TREE_TYPE (last_rhs);
    1746      2919879 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1747      2919879 :   if (!INTEGRAL_TYPE_P (in_type)
    1748      2615825 :       || !INTEGRAL_TYPE_P (out_type)
    1749      2510091 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1750      3516042 :       || !TYPE_UNSIGNED (in_type))
    1751              :     return NULL;
    1752              : 
    1753       223346 :   vect_unpromoted_value unprom;
    1754       223346 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1755       223346 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1756              :     return NULL;
    1757              : 
    1758       220616 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1759       220616 :   if (!abd_pattern_vinfo)
    1760              :     return NULL;
    1761              : 
    1762     30414717 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1763         9111 :   if (!abd_stmt
    1764         9111 :       || !gimple_call_internal_p (abd_stmt)
    1765          234 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1766              :     return NULL;
    1767              : 
    1768            0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1769            0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1770              : 
    1771            0 :   code_helper dummy_code;
    1772            0 :   int dummy_int;
    1773            0 :   auto_vec<tree> dummy_vec;
    1774            0 :   if (!supportable_widening_operation (IFN_VEC_WIDEN_ABD, vectype_out,
    1775              :                                        vectype_in, false,
    1776              :                                        &dummy_code, &dummy_code,
    1777              :                                        &dummy_int, &dummy_vec))
    1778              :     return NULL;
    1779              : 
    1780            0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1781              : 
    1782            0 :   *type_out = vectype_out;
    1783              : 
    1784            0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1785            0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1786            0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1787            0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1788              :                                                       abd_oprnd0, abd_oprnd1);
    1789            0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1790            0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1791            0 :   return widen_abd_stmt;
    1792            0 : }
    1793              : 
    1794              : /* Function vect_recog_ctz_ffs_pattern
    1795              : 
    1796              :    Try to find the following pattern:
    1797              : 
    1798              :    TYPE1 A;
    1799              :    TYPE1 B;
    1800              : 
    1801              :    B = __builtin_ctz{,l,ll} (A);
    1802              : 
    1803              :    or
    1804              : 
    1805              :    B = __builtin_ffs{,l,ll} (A);
    1806              : 
    1807              :    Input:
    1808              : 
    1809              :    * STMT_VINFO: The stmt from which the pattern search begins.
    1810              :    here it starts with B = __builtin_* (A);
    1811              : 
    1812              :    Output:
    1813              : 
    1814              :    * TYPE_OUT: The vector type of the output of this pattern.
    1815              : 
    1816              :    * Return value: A new stmt that will be used to replace the sequence of
    1817              :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1818              : 
    1819              : static gimple *
    1820     30120530 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1821              :                             tree *type_out)
    1822              : {
    1823     30120530 :   gimple *call_stmt = stmt_vinfo->stmt;
    1824     30120530 :   gimple *pattern_stmt;
    1825     30120530 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1826     30120530 :   tree new_var;
    1827     30120530 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1828     30120530 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1829     30120530 :   int val = 0, val_new = 0, val_cmp = 0;
    1830     30120530 :   int prec;
    1831     30120530 :   int sub = 0, add = 0;
    1832     30120530 :   location_t loc;
    1833              : 
    1834     30120530 :   if (!is_gimple_call (call_stmt))
    1835              :     return NULL;
    1836              : 
    1837      3587530 :   if (gimple_call_num_args (call_stmt) != 1
    1838      3587530 :       && gimple_call_num_args (call_stmt) != 2)
    1839              :     return NULL;
    1840              : 
    1841      2005096 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1842      2005096 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1843      2005096 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1844      2005096 :   if (!lhs_oprnd)
    1845              :     return NULL;
    1846       972642 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1847       972642 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1848       330310 :       || !INTEGRAL_TYPE_P (rhs_type)
    1849        48642 :       || !type_has_mode_precision_p (rhs_type)
    1850      1019710 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1851       937220 :     return NULL;
    1852              : 
    1853        35422 :   switch (gimple_call_combined_fn (call_stmt))
    1854              :     {
    1855         1158 :     CASE_CFN_CTZ:
    1856         1158 :       ifn = IFN_CTZ;
    1857         1158 :       if (!gimple_call_internal_p (call_stmt)
    1858         1158 :           || gimple_call_num_args (call_stmt) != 2)
    1859              :         defined_at_zero = false;
    1860              :       else
    1861           48 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1862              :       break;
    1863              :     CASE_CFN_FFS:
    1864              :       ifn = IFN_FFS;
    1865              :       break;
    1866              :     default:
    1867              :       return NULL;
    1868              :     }
    1869              : 
    1870         1324 :   prec = TYPE_PRECISION (rhs_type);
    1871         1324 :   loc = gimple_location (call_stmt);
    1872              : 
    1873         1324 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1874         1324 :   if (!vec_type)
    1875              :     return NULL;
    1876              : 
    1877         1318 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1878         1318 :   if (!vec_rhs_type)
    1879              :     return NULL;
    1880              : 
    1881              :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1882              :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1883              :      popcount<vector_mode>2.  */
    1884         1088 :   if (!vec_type
    1885         1088 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1886              :                                          OPTIMIZE_FOR_SPEED))
    1887              :     return NULL;
    1888              : 
    1889         1088 :   if (ifn == IFN_FFS
    1890         1088 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1891              :                                          OPTIMIZE_FOR_SPEED))
    1892              :     {
    1893            0 :       ifnnew = IFN_CTZ;
    1894            0 :       defined_at_zero_new
    1895            0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1896              :                                      val_new) == 2;
    1897              :     }
    1898         1088 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1899              :                                            OPTIMIZE_FOR_SPEED))
    1900              :     {
    1901           94 :       ifnnew = IFN_CLZ;
    1902           94 :       defined_at_zero_new
    1903           94 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1904              :                                      val_new) == 2;
    1905              :     }
    1906           94 :   if ((ifnnew == IFN_LAST
    1907           94 :        || (defined_at_zero && !defined_at_zero_new))
    1908          994 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1909              :                                          OPTIMIZE_FOR_SPEED))
    1910              :     {
    1911              :       ifnnew = IFN_POPCOUNT;
    1912              :       defined_at_zero_new = true;
    1913              :       val_new = prec;
    1914              :     }
    1915         1052 :   if (ifnnew == IFN_LAST)
    1916              :     return NULL;
    1917              : 
    1918          130 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1919              : 
    1920          130 :   val_cmp = val_new;
    1921          130 :   if ((ifnnew == IFN_CLZ
    1922          130 :        && defined_at_zero
    1923           60 :        && defined_at_zero_new
    1924           60 :        && val == prec
    1925           31 :        && val_new == prec)
    1926           99 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1927              :     {
    1928           56 :       if (vect_is_reduction (stmt_vinfo))
    1929              :         return NULL;
    1930              : 
    1931              :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1932              :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1933           56 :       if (ifnnew == IFN_CLZ)
    1934           31 :         sub = prec;
    1935           56 :       val_cmp = prec;
    1936              : 
    1937           56 :       if (!TYPE_UNSIGNED (rhs_type))
    1938              :         {
    1939           12 :           rhs_type = unsigned_type_for (rhs_type);
    1940           12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1941           12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1942           12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1943           12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1944              :                                   vec_rhs_type);
    1945           12 :           rhs_oprnd = new_var;
    1946              :         }
    1947              : 
    1948           56 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1949           56 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1950              :                                           build_int_cst (rhs_type, -1));
    1951           56 :       gimple_set_location (pattern_stmt, loc);
    1952           56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1953              : 
    1954           56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1955           56 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1956           56 :       gimple_set_location (pattern_stmt, loc);
    1957           56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1958           56 :       rhs_oprnd = new_var;
    1959              : 
    1960           56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1961           56 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1962              :                                           m1, rhs_oprnd);
    1963           56 :       gimple_set_location (pattern_stmt, loc);
    1964           56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1965           56 :       rhs_oprnd = new_var;
    1966           56 :     }
    1967           74 :   else if (ifnnew == IFN_CLZ)
    1968              :     {
    1969           63 :       if (vect_is_reduction (stmt_vinfo))
    1970              :         return NULL;
    1971              : 
    1972              :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1973              :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1974           63 :       sub = prec - (ifn == IFN_CTZ);
    1975           63 :       val_cmp = sub - val_new;
    1976              : 
    1977           63 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1978           63 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1979           63 :       gimple_set_location (pattern_stmt, loc);
    1980           63 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1981              : 
    1982           63 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1983           63 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1984              :                                           rhs_oprnd, neg);
    1985           63 :       gimple_set_location (pattern_stmt, loc);
    1986           63 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1987           63 :       rhs_oprnd = new_var;
    1988              :     }
    1989           11 :   else if (ifnnew == IFN_POPCOUNT)
    1990              :     {
    1991           11 :       if (vect_is_reduction (stmt_vinfo))
    1992              :         return NULL;
    1993              : 
    1994              :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1995              :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1996           11 :       sub = prec + (ifn == IFN_FFS);
    1997           11 :       val_cmp = sub;
    1998              : 
    1999           11 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    2000           11 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    2001           11 :       gimple_set_location (pattern_stmt, loc);
    2002           11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2003              : 
    2004           11 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    2005           11 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    2006              :                                           rhs_oprnd, neg);
    2007           11 :       gimple_set_location (pattern_stmt, loc);
    2008           11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    2009           11 :       rhs_oprnd = new_var;
    2010              :     }
    2011            0 :   else if (ifnnew == IFN_CTZ)
    2012              :     {
    2013              :       /* .FFS (X) = .CTZ (X) + 1.  */
    2014            0 :       add = 1;
    2015            0 :       val_cmp++;
    2016              : 
    2017            0 :       if (vect_is_reduction (stmt_vinfo)
    2018            0 :           && defined_at_zero
    2019            0 :           && (!defined_at_zero_new || val != val_cmp))
    2020              :         return NULL;
    2021              :     }
    2022              : 
    2023              :   /* Create B = .IFNNEW (A).  */
    2024          130 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2025          130 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    2026           94 :     pattern_stmt
    2027           94 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    2028              :                                     build_int_cst (integer_type_node,
    2029           94 :                                                    val_new));
    2030              :   else
    2031           36 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    2032          130 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2033          130 :   gimple_set_location (pattern_stmt, loc);
    2034          130 :   *type_out = vec_type;
    2035              : 
    2036          130 :   if (sub)
    2037              :     {
    2038          105 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2039          105 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2040          105 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2041          105 :                                           build_int_cst (lhs_type, sub),
    2042              :                                           new_var);
    2043          105 :       gimple_set_location (pattern_stmt, loc);
    2044          105 :       new_var = ret_var;
    2045              :     }
    2046           25 :   else if (add)
    2047              :     {
    2048            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2049            0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2050            0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2051            0 :                                           build_int_cst (lhs_type, add));
    2052            0 :       gimple_set_location (pattern_stmt, loc);
    2053            0 :       new_var = ret_var;
    2054              :     }
    2055              : 
    2056          130 :   if (defined_at_zero
    2057           88 :       && (!defined_at_zero_new || val != val_cmp))
    2058              :     {
    2059           11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2060           11 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2061           11 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2062           11 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2063           11 :       tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2064           11 :       pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
    2065              :                                           build_zero_cst (rhs_type));
    2066           11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    2067              :                               truth_type_for (vec_type), rhs_type);
    2068           11 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2069              :                                           new_var,
    2070           11 :                                           build_int_cst (lhs_type, val));
    2071              :     }
    2072              : 
    2073          130 :   if (dump_enabled_p ())
    2074           36 :     dump_printf_loc (MSG_NOTE, vect_location,
    2075              :                      "created pattern stmt: %G", pattern_stmt);
    2076              : 
    2077              :   return pattern_stmt;
    2078              : }
    2079              : 
    2080              : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2081              : 
    2082              :    Try to find the following pattern:
    2083              : 
    2084              :    UTYPE1 A;
    2085              :    TYPE1 B;
    2086              :    UTYPE2 temp_in;
    2087              :    TYPE3 temp_out;
    2088              :    temp_in = (UTYPE2)A;
    2089              : 
    2090              :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2091              :    B = (TYPE1) temp_out;
    2092              : 
    2093              :    TYPE2 may or may not be equal to TYPE3.
    2094              :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2095              :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2096              : 
    2097              :    Input:
    2098              : 
    2099              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2100              :    here it starts with B = (TYPE1) temp_out;
    2101              : 
    2102              :    Output:
    2103              : 
    2104              :    * TYPE_OUT: The vector type of the output of this pattern.
    2105              : 
    2106              :    * Return value: A new stmt that will be used to replace the sequence of
    2107              :    stmts that constitute the pattern. In this case it will be:
    2108              :    B = .POPCOUNT (A);
    2109              : 
    2110              :    Similarly for clz, ctz and ffs.
    2111              : */
    2112              : 
    2113              : static gimple *
    2114     30120410 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2115              :                                          stmt_vec_info stmt_vinfo,
    2116              :                                          tree *type_out)
    2117              : {
    2118     30120410 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2119     20375535 :   gimple *call_stmt, *pattern_stmt;
    2120     20375535 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2121     50495786 :   internal_fn ifn = IFN_LAST;
    2122     30120251 :   int addend = 0;
    2123              : 
    2124              :   /* Find B = (TYPE1) temp_out. */
    2125     20375535 :   if (!last_stmt)
    2126              :     return NULL;
    2127     20375535 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2128     20375535 :   if (!CONVERT_EXPR_CODE_P (code))
    2129              :     return NULL;
    2130              : 
    2131      2800467 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2132      2800467 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2133      2800467 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2134              :     return NULL;
    2135              : 
    2136      2631794 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2137      2631794 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2138      2631794 :       || !has_single_use (rhs_oprnd))
    2139              :     return NULL;
    2140      1350457 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2141              : 
    2142              :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2143      1350457 :   if (!is_gimple_call (call_stmt))
    2144              :     return NULL;
    2145       102416 :   switch (gimple_call_combined_fn (call_stmt))
    2146              :     {
    2147              :       int val;
    2148              :     CASE_CFN_POPCOUNT:
    2149              :       ifn = IFN_POPCOUNT;
    2150              :       break;
    2151         3306 :     CASE_CFN_CLZ:
    2152         3306 :       ifn = IFN_CLZ;
    2153              :       /* Punt if call result is unsigned and defined value at zero
    2154              :          is negative, as the negative value doesn't extend correctly.  */
    2155         3306 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2156            0 :           && gimple_call_internal_p (call_stmt)
    2157         3306 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2158              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2159         3306 :           && val < 0)
    2160              :         return NULL;
    2161              :       break;
    2162          591 :     CASE_CFN_CTZ:
    2163          591 :       ifn = IFN_CTZ;
    2164              :       /* Punt if call result is unsigned and defined value at zero
    2165              :          is negative, as the negative value doesn't extend correctly.  */
    2166          591 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2167            0 :           && gimple_call_internal_p (call_stmt)
    2168          591 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2169              :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2170          591 :           && val < 0)
    2171              :         return NULL;
    2172              :       break;
    2173           18 :     CASE_CFN_FFS:
    2174           18 :       ifn = IFN_FFS;
    2175           18 :       break;
    2176              :     default:
    2177              :       return NULL;
    2178              :     }
    2179              : 
    2180         4145 :   if (gimple_call_num_args (call_stmt) != 1
    2181         4145 :       && gimple_call_num_args (call_stmt) != 2)
    2182              :     return NULL;
    2183              : 
    2184         4145 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2185         4145 :   vect_unpromoted_value unprom_diff;
    2186         4145 :   rhs_origin
    2187         4145 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2188              : 
    2189         4145 :   if (!rhs_origin)
    2190              :     return NULL;
    2191              : 
    2192              :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2193         4135 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2194              :     return NULL;
    2195              : 
    2196              :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2197              :      different builtins/internal functions have different behaviors.  */
    2198         1604 :   if (TYPE_PRECISION (unprom_diff.type)
    2199         1604 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2200          158 :     switch (ifn)
    2201              :       {
    2202           79 :       case IFN_POPCOUNT:
    2203              :         /* For popcount require zero extension, which doesn't add any
    2204              :            further bits to the count.  */
    2205           79 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2206              :           return NULL;
    2207              :         break;
    2208           61 :       case IFN_CLZ:
    2209              :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2210              :            if it is undefined at zero or if it matches also for the
    2211              :            defined value there.  */
    2212           61 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2213              :           return NULL;
    2214           61 :         if (!type_has_mode_precision_p (lhs_type)
    2215           61 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2216            0 :           return NULL;
    2217           61 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2218           61 :                   - TYPE_PRECISION (lhs_type));
    2219           61 :         if (gimple_call_internal_p (call_stmt)
    2220           61 :             && gimple_call_num_args (call_stmt) == 2)
    2221              :           {
    2222            0 :             int val1, val2;
    2223            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2224            0 :             int d2
    2225            0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2226              :                                            val2);
    2227            0 :             if (d2 != 2 || val1 != val2 + addend)
    2228              :               return NULL;
    2229              :           }
    2230              :         break;
    2231           13 :       case IFN_CTZ:
    2232              :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2233              :            if it is undefined at zero or if it matches also for the
    2234              :            defined value there.  */
    2235           13 :         if (gimple_call_internal_p (call_stmt)
    2236           13 :             && gimple_call_num_args (call_stmt) == 2)
    2237              :           {
    2238            0 :             int val1, val2;
    2239            0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2240            0 :             int d2
    2241            0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2242              :                                            val2);
    2243            0 :             if (d2 != 2 || val1 != val2)
    2244              :               return NULL;
    2245              :           }
    2246              :         break;
    2247              :       case IFN_FFS:
    2248              :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2249              :         break;
    2250            0 :       default:
    2251            0 :         gcc_unreachable ();
    2252              :       }
    2253              : 
    2254         1604 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2255              :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2256         1604 :   if (!vec_type)
    2257              :     return NULL;
    2258              : 
    2259         1486 :   bool supported
    2260         1486 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2261         1486 :   if (!supported)
    2262         1383 :     switch (ifn)
    2263              :       {
    2264              :       case IFN_POPCOUNT:
    2265              :       case IFN_CLZ:
    2266              :         return NULL;
    2267           18 :       case IFN_FFS:
    2268              :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2269           18 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2270              :                                             OPTIMIZE_FOR_SPEED))
    2271              :           break;
    2272              :         /* FALLTHRU */
    2273          378 :       case IFN_CTZ:
    2274              :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2275              :            clz or popcount.  */
    2276          378 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2277              :                                             OPTIMIZE_FOR_SPEED))
    2278              :           break;
    2279          344 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2280              :                                             OPTIMIZE_FOR_SPEED))
    2281              :           break;
    2282              :         return NULL;
    2283            0 :       default:
    2284            0 :         gcc_unreachable ();
    2285              :       }
    2286              : 
    2287          159 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2288              :                          call_stmt);
    2289              : 
    2290              :   /* Create B = .POPCOUNT (A).  */
    2291          159 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2292          159 :   tree arg2 = NULL_TREE;
    2293          159 :   int val;
    2294          159 :   if (ifn == IFN_CLZ
    2295          191 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2296              :                                     val) == 2)
    2297           30 :     arg2 = build_int_cst (integer_type_node, val);
    2298          129 :   else if (ifn == IFN_CTZ
    2299          167 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2300              :                                          val) == 2)
    2301           38 :     arg2 = build_int_cst (integer_type_node, val);
    2302          159 :   if (arg2)
    2303           68 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2304              :   else
    2305           91 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2306          159 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2307          159 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2308          159 :   *type_out = vec_type;
    2309              : 
    2310          159 :   if (dump_enabled_p ())
    2311           24 :     dump_printf_loc (MSG_NOTE, vect_location,
    2312              :                      "created pattern stmt: %G", pattern_stmt);
    2313              : 
    2314          159 :   if (addend)
    2315              :     {
    2316            6 :       gcc_assert (supported);
    2317            6 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2318            6 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2319            6 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2320            6 :                                           build_int_cst (lhs_type, addend));
    2321              :     }
    2322          153 :   else if (!supported)
    2323              :     {
    2324           56 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2325           56 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2326           56 :       pattern_stmt
    2327           56 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2328           56 :       if (pattern_stmt == NULL)
    2329              :         return NULL;
    2330           56 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2331              :         {
    2332           56 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2333           56 :           gimple_seq_add_seq_without_update (pseq, seq);
    2334              :         }
    2335              :     }
    2336              :   return pattern_stmt;
    2337              : }
    2338              : 
    2339              : /* Function vect_recog_pow_pattern
    2340              : 
    2341              :    Try to find the following pattern:
    2342              : 
    2343              :      x = POW (y, N);
    2344              : 
    2345              :    with POW being one of pow, powf, powi, powif and N being
    2346              :    either 2 or 0.5.
    2347              : 
    2348              :    Input:
    2349              : 
    2350              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2351              : 
    2352              :    Output:
    2353              : 
    2354              :    * TYPE_OUT: The type of the output of this pattern.
    2355              : 
    2356              :    * Return value: A new stmt that will be used to replace the sequence of
    2357              :    stmts that constitute the pattern. In this case it will be:
    2358              :         x = x * x
    2359              :    or
    2360              :         x = sqrt (x)
    2361              : */
    2362              : 
    2363              : static gimple *
    2364     30120425 : vect_recog_pow_pattern (vec_info *vinfo,
    2365              :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2366              : {
    2367     30120425 :   gimple *last_stmt = stmt_vinfo->stmt;
    2368     30120425 :   tree base, exp;
    2369     30120425 :   gimple *stmt;
    2370     30120425 :   tree var;
    2371              : 
    2372     30120425 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2373              :     return NULL;
    2374              : 
    2375      1499189 :   switch (gimple_call_combined_fn (last_stmt))
    2376              :     {
    2377          266 :     CASE_CFN_POW:
    2378          266 :     CASE_CFN_POWI:
    2379          266 :       break;
    2380              : 
    2381              :     default:
    2382              :       return NULL;
    2383              :     }
    2384              : 
    2385          266 :   base = gimple_call_arg (last_stmt, 0);
    2386          266 :   exp = gimple_call_arg (last_stmt, 1);
    2387          266 :   if (TREE_CODE (exp) != REAL_CST
    2388          239 :       && TREE_CODE (exp) != INTEGER_CST)
    2389              :     {
    2390          239 :       if (flag_unsafe_math_optimizations
    2391           27 :           && TREE_CODE (base) == REAL_CST
    2392          241 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2393              :         {
    2394            2 :           combined_fn log_cfn;
    2395            2 :           built_in_function exp_bfn;
    2396            2 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2397              :             {
    2398              :             case BUILT_IN_POW:
    2399              :               log_cfn = CFN_BUILT_IN_LOG;
    2400              :               exp_bfn = BUILT_IN_EXP;
    2401              :               break;
    2402            0 :             case BUILT_IN_POWF:
    2403            0 :               log_cfn = CFN_BUILT_IN_LOGF;
    2404            0 :               exp_bfn = BUILT_IN_EXPF;
    2405            0 :               break;
    2406            0 :             case BUILT_IN_POWL:
    2407            0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2408            0 :               exp_bfn = BUILT_IN_EXPL;
    2409            0 :               break;
    2410              :             default:
    2411              :               return NULL;
    2412              :             }
    2413            2 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2414            2 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2415              :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2416              :              does that, but if C is a power of 2, we want to use
    2417              :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2418              :              vectorization we don't have vectorized exp2.  */
    2419            2 :           if (logc
    2420            2 :               && TREE_CODE (logc) == REAL_CST
    2421            2 :               && exp_decl
    2422            4 :               && lookup_attribute ("omp declare simd",
    2423            2 :                                    DECL_ATTRIBUTES (exp_decl)))
    2424              :             {
    2425            2 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2426            2 :               if (node->simd_clones == NULL)
    2427              :                 {
    2428            2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2429            2 :                       || node->definition)
    2430              :                     return NULL;
    2431            2 :                   expand_simd_clones (node);
    2432            2 :                   if (node->simd_clones == NULL)
    2433              :                     return NULL;
    2434              :                 }
    2435            2 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2436            2 :               if (!*type_out)
    2437              :                 return NULL;
    2438            2 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2439            2 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2440            2 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2441            2 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2442            2 :               g = gimple_build_call (exp_decl, 1, def);
    2443            2 :               gimple_call_set_lhs (g, res);
    2444            2 :               return g;
    2445              :             }
    2446              :         }
    2447              : 
    2448          237 :       return NULL;
    2449              :     }
    2450              : 
    2451              :   /* We now have a pow or powi builtin function call with a constant
    2452              :      exponent.  */
    2453              : 
    2454              :   /* Catch squaring.  */
    2455           27 :   if ((tree_fits_shwi_p (exp)
    2456            0 :        && tree_to_shwi (exp) == 2)
    2457           27 :       || (TREE_CODE (exp) == REAL_CST
    2458           27 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2459              :     {
    2460            7 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2461            7 :                                             TREE_TYPE (base), type_out))
    2462              :         return NULL;
    2463              : 
    2464            7 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2465            7 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2466            7 :       return stmt;
    2467              :     }
    2468              : 
    2469              :   /* Catch square root.  */
    2470           20 :   if (TREE_CODE (exp) == REAL_CST
    2471           20 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2472              :     {
    2473           10 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2474           10 :       if (*type_out
    2475           10 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2476              :                                              OPTIMIZE_FOR_SPEED))
    2477              :         {
    2478            8 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2479            8 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2480            8 :           gimple_call_set_lhs (stmt, var);
    2481            8 :           gimple_call_set_nothrow (stmt, true);
    2482            8 :           return stmt;
    2483              :         }
    2484              :     }
    2485              : 
    2486              :   return NULL;
    2487              : }
    2488              : 
    2489              : 
    2490              : /* Function vect_recog_widen_sum_pattern
    2491              : 
    2492              :    Try to find the following pattern:
    2493              : 
    2494              :      type x_t;
    2495              :      TYPE x_T, sum = init;
    2496              :    loop:
    2497              :      sum_0 = phi <init, sum_1>
    2498              :      S1  x_t = *p;
    2499              :      S2  x_T = (TYPE) x_t;
    2500              :      S3  sum_1 = x_T + sum_0;
    2501              : 
    2502              :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2503              :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2504              :    a special case of a reduction computation.
    2505              : 
    2506              :    Input:
    2507              : 
    2508              :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2509              :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2510              : 
    2511              :    Output:
    2512              : 
    2513              :    * TYPE_OUT: The type of the output of this pattern.
    2514              : 
    2515              :    * Return value: A new stmt that will be used to replace the sequence of
    2516              :    stmts that constitute the pattern. In this case it will be:
    2517              :         WIDEN_SUM <x_t, sum_0>
    2518              : 
    2519              :    Note: The widening-sum idiom is a widening reduction pattern that is
    2520              :          vectorized without preserving all the intermediate results. It
    2521              :          produces only N/2 (widened) results (by summing up pairs of
    2522              :          intermediate results) rather than all N results.  Therefore, we
    2523              :          cannot allow this pattern when we want to get all the results and in
    2524              :          the correct order (as is the case when this computation is in an
    2525              :          inner-loop nested in an outer-loop that us being vectorized).  */
    2526              : 
    2527              : static gimple *
    2528     30120425 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2529              :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2530              : {
    2531     30120425 :   gimple *last_stmt = stmt_vinfo->stmt;
    2532     30120425 :   tree oprnd0, oprnd1;
    2533     30120425 :   tree type;
    2534     30120425 :   gimple *pattern_stmt;
    2535     30120425 :   tree var;
    2536              : 
    2537              :   /* Look for the following pattern
    2538              :           DX = (TYPE) X;
    2539              :           sum_1 = DX + sum_0;
    2540              :      In which DX is at least double the size of X, and sum_1 has been
    2541              :      recognized as a reduction variable.
    2542              :    */
    2543              : 
    2544              :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2545              :      of the above pattern.  */
    2546              : 
    2547     30120425 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2548              :                                        &oprnd0, &oprnd1)
    2549        35345 :       || TREE_CODE (oprnd0) != SSA_NAME
    2550     30155533 :       || !vinfo->lookup_def (oprnd0))
    2551     30085380 :     return NULL;
    2552              : 
    2553        35045 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2554              : 
    2555              :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2556              :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2557              :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2558              :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2559              :      'TYPE'.  */
    2560              : 
    2561        35045 :   vect_unpromoted_value unprom0;
    2562        35045 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2563        35045 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2564              :     return NULL;
    2565              : 
    2566         1736 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2567              : 
    2568         1736 :   if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2569              :                                       unprom0.type, type_out))
    2570              :     return NULL;
    2571              : 
    2572            0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2573            0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2574              : 
    2575            0 :   return pattern_stmt;
    2576              : }
    2577              : 
    2578              : /* Function vect_recog_bitfield_ref_pattern
    2579              : 
    2580              :    Try to find the following pattern:
    2581              : 
    2582              :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2583              :    result = (type_out) bf_value;
    2584              : 
    2585              :    or
    2586              : 
    2587              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2588              : 
    2589              :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2590              :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2591              : 
    2592              :    Input:
    2593              : 
    2594              :    * STMT_VINFO: The stmt from which the pattern search begins.
    2595              :    here it starts with:
    2596              :    result = (type_out) bf_value;
    2597              : 
    2598              :    or
    2599              : 
    2600              :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2601              : 
    2602              :    Output:
    2603              : 
    2604              :    * TYPE_OUT: The vector type of the output of this pattern.
    2605              : 
    2606              :    * Return value: A new stmt that will be used to replace the sequence of
    2607              :    stmts that constitute the pattern. If the precision of type_out is bigger
    2608              :    than the precision type of _1 we perform the widening before the shifting,
    2609              :    since the new precision will be large enough to shift the value and moving
    2610              :    widening operations up the statement chain enables the generation of
    2611              :    widening loads.  If we are widening and the operation after the pattern is
    2612              :    an addition then we mask first and shift later, to enable the generation of
    2613              :    shifting adds.  In the case of narrowing we will always mask first, shift
    2614              :    last and then perform a narrowing operation.  This will enable the
    2615              :    generation of narrowing shifts.
    2616              : 
    2617              :    Widening with mask first, shift later:
    2618              :    container = (type_out) container;
    2619              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2620              :    result = masked >> bitpos;
    2621              : 
    2622              :    Widening with shift first, mask last:
    2623              :    container = (type_out) container;
    2624              :    shifted = container >> bitpos;
    2625              :    result = shifted & ((1 << bitsize) - 1);
    2626              : 
    2627              :    Narrowing:
    2628              :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2629              :    result = masked >> bitpos;
    2630              :    result = (type_out) result;
    2631              : 
    2632              :    If the bitfield is signed and it's wider than type_out, we need to
    2633              :    keep the result sign-extended:
    2634              :    container = (type) container;
    2635              :    masked = container << (prec - bitsize - bitpos);
    2636              :    result = (type_out) (masked >> (prec - bitsize));
    2637              : 
    2638              :    Here type is the signed variant of the wider of type_out and the type
    2639              :    of container.
    2640              : 
    2641              :    The shifting is always optional depending on whether bitpos != 0.
    2642              : 
    2643              :    When the original bitfield was inside a gcond then an new gcond is also
    2644              :    generated with the newly `result` as the operand to the comparison.
    2645              : 
    2646              : */
    2647              : 
    2648              : static gimple *
    2649     30070654 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2650              :                                  tree *type_out)
    2651              : {
    2652     30070654 :   gimple *bf_stmt = NULL;
    2653     30070654 :   tree lhs = NULL_TREE;
    2654     30070654 :   tree ret_type = NULL_TREE;
    2655     30070654 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2656     30070654 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2657              :     {
    2658      5151963 :       tree op = gimple_cond_lhs (cond_stmt);
    2659      5151963 :       if (TREE_CODE (op) != SSA_NAME)
    2660              :         return NULL;
    2661      5151662 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2662      5151662 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2663              :         return NULL;
    2664              :     }
    2665     24918691 :   else if (is_gimple_assign (stmt)
    2666     20325369 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2667     27648510 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2668              :     {
    2669      2689219 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2670      2689219 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2671      2689219 :       lhs = gimple_assign_lhs (stmt);
    2672      2689219 :       ret_type = TREE_TYPE (lhs);
    2673              :     }
    2674              : 
    2675      6029768 :   if (!bf_stmt
    2676      6029768 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2677              :     return NULL;
    2678              : 
    2679        14775 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2680        14775 :   tree container = TREE_OPERAND (bf_ref, 0);
    2681        14775 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2682              : 
    2683        14775 :   if (!bit_field_offset (bf_ref).is_constant ()
    2684        14775 :       || !bit_field_size (bf_ref).is_constant ()
    2685        14775 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2686              :     return NULL;
    2687              : 
    2688        29172 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2689        14773 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2690        16929 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2691        12621 :     return NULL;
    2692              : 
    2693         2154 :   gimple *use_stmt, *pattern_stmt;
    2694         2154 :   use_operand_p use_p;
    2695         2154 :   bool shift_first = true;
    2696         2154 :   tree container_type = TREE_TYPE (container);
    2697         2154 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2698              : 
    2699              :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2700              :      the container may change and we have to consider offset change for
    2701              :      widening loads on big endianness.  The shift_n calculated here can be
    2702              :      independent of widening.  */
    2703         2154 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2704         2154 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2705         2154 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2706         2154 :   if (BYTES_BIG_ENDIAN)
    2707              :     shift_n = prec - shift_n - mask_width;
    2708              : 
    2709         2154 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2710         1394 :                    TYPE_PRECISION (ret_type) > mask_width);
    2711         2154 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2712         2154 :                      TYPE_PRECISION (ret_type));
    2713              : 
    2714              :   /* We move the conversion earlier if the loaded type is smaller than the
    2715              :      return type to enable the use of widening loads.  And if we need a
    2716              :      sign extension, we need to convert the loaded value early to a signed
    2717              :      type as well.  */
    2718         2154 :   if (ref_sext || load_widen)
    2719              :     {
    2720          941 :       tree type = load_widen ? ret_type : container_type;
    2721          941 :       if (ref_sext)
    2722          902 :         type = gimple_signed_type (type);
    2723          941 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2724              :                                           NOP_EXPR, container);
    2725          941 :       container = gimple_get_lhs (pattern_stmt);
    2726          941 :       container_type = TREE_TYPE (container);
    2727          941 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2728          941 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2729          941 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2730              :     }
    2731         1213 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2732              :     /* If we are doing the conversion last then also delay the shift as we may
    2733              :        be able to combine the shift and conversion in certain cases.  */
    2734              :     shift_first = false;
    2735              : 
    2736              :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2737              :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2738              :      add into a single instruction.  */
    2739         1413 :   if (lhs && !is_pattern_stmt_p (stmt_info)
    2740         3567 :       && single_imm_use (lhs, &use_p, &use_stmt))
    2741              :     {
    2742         1049 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2743         1049 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2744              :         shift_first = false;
    2745              :     }
    2746              : 
    2747              :   /* If we don't have to shift we only generate the mask, so just fix the
    2748              :      code-path to shift_first.  */
    2749         2154 :   if (shift_n == 0)
    2750          753 :     shift_first = true;
    2751              : 
    2752         2154 :   tree result;
    2753         2154 :   if (shift_first && !ref_sext)
    2754              :     {
    2755          500 :       tree shifted = container;
    2756          500 :       if (shift_n)
    2757              :         {
    2758           59 :           pattern_stmt
    2759           59 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2760              :                                    RSHIFT_EXPR, container,
    2761           59 :                                    build_int_cst (sizetype, shift_n));
    2762           59 :           shifted = gimple_assign_lhs (pattern_stmt);
    2763           59 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2764              :         }
    2765              : 
    2766          500 :       tree mask = wide_int_to_tree (container_type,
    2767          500 :                                     wi::mask (mask_width, false, prec));
    2768              : 
    2769          500 :       pattern_stmt
    2770          500 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2771              :                                BIT_AND_EXPR, shifted, mask);
    2772          500 :       result = gimple_assign_lhs (pattern_stmt);
    2773              :     }
    2774              :   else
    2775              :     {
    2776         1654 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2777         1654 :       if (!ref_sext)
    2778              :         {
    2779          752 :           tree mask = wide_int_to_tree (container_type,
    2780          752 :                                         wi::shifted_mask (shift_n,
    2781              :                                                           mask_width,
    2782              :                                                           false, prec));
    2783          752 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2784              :                                               container, mask);
    2785              :         }
    2786              :       else
    2787              :         {
    2788          902 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2789          902 :           shift_n += shl;
    2790          902 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2791              :                                               container,
    2792              :                                               build_int_cst (sizetype,
    2793          902 :                                                              shl));
    2794              :         }
    2795              : 
    2796         1654 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2797         1654 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2798         1654 :       pattern_stmt
    2799         1654 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2800              :                                RSHIFT_EXPR, masked,
    2801         1654 :                                build_int_cst (sizetype, shift_n));
    2802         1654 :       result = gimple_assign_lhs (pattern_stmt);
    2803              :     }
    2804              : 
    2805         2154 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2806              :     {
    2807         1435 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2808         1435 :       pattern_stmt
    2809         1435 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2810              :                                NOP_EXPR, result);
    2811              :     }
    2812              : 
    2813         2154 :   if (!lhs)
    2814              :     {
    2815          741 :       if (!vectype)
    2816              :         return NULL;
    2817              : 
    2818          603 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2819          603 :       vectype = truth_type_for (vectype);
    2820              : 
    2821              :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2822              :                 same way as vect_recog_gcond_pattern does.  However because
    2823              :                 patterns cannot match the same root twice,  when we handle and
    2824              :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2825              :                 apply anymore.  We should really fix it so that we don't need to
    2826              :                 duplicate transformations like these.  */
    2827          603 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2828          603 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2829          603 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2830          603 :       gimple *new_stmt
    2831          603 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2832              :                                gimple_get_lhs (pattern_stmt),
    2833              :                                fold_convert (container_type, cond_cst));
    2834          603 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2835          603 :       pattern_stmt
    2836          603 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2837          603 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2838              :                              NULL_TREE, NULL_TREE);
    2839              :     }
    2840              : 
    2841         2016 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2842         2016 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2843              : 
    2844         2016 :   return pattern_stmt;
    2845              : }
    2846              : 
    2847              : /* Function vect_recog_bit_insert_pattern
    2848              : 
    2849              :    Try to find the following pattern:
    2850              : 
    2851              :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2852              : 
    2853              :    Input:
    2854              : 
    2855              :    * STMT_VINFO: The stmt we want to replace.
    2856              : 
    2857              :    Output:
    2858              : 
    2859              :    * TYPE_OUT: The vector type of the output of this pattern.
    2860              : 
    2861              :    * Return value: A new stmt that will be used to replace the sequence of
    2862              :    stmts that constitute the pattern. In this case it will be:
    2863              :    value = (container_type) value;          // Make sure
    2864              :    shifted = value << bitpos;                 // Shift value into place
    2865              :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2866              :                                             // the 'to-write value'.
    2867              :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2868              :                                             // write to from the value we want
    2869              :                                             // to write to.
    2870              :    written = cleared | masked;              // Write bits.
    2871              : 
    2872              : 
    2873              :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2874              :    bits corresponding to the real size of the bitfield value we are writing to.
    2875              :    The shifting is always optional depending on whether bitpos != 0.
    2876              : 
    2877              : */
    2878              : 
    2879              : static gimple *
    2880     30073664 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2881              :                                tree *type_out)
    2882              : {
    2883     30073664 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2884     27174081 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2885              :     return NULL;
    2886              : 
    2887          567 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2888          567 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2889          567 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2890              : 
    2891          567 :   tree bf_type = TREE_TYPE (value);
    2892          567 :   tree container_type = TREE_TYPE (container);
    2893              : 
    2894          567 :   if (!INTEGRAL_TYPE_P (container_type)
    2895          567 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2896              :     return NULL;
    2897              : 
    2898          470 :   gimple *pattern_stmt;
    2899              : 
    2900          470 :   vect_unpromoted_value unprom;
    2901          470 :   unprom.set_op (value, vect_internal_def);
    2902          470 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2903              :                               get_vectype_for_scalar_type (vinfo,
    2904              :                                                            container_type));
    2905              : 
    2906          470 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2907          470 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2908          470 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2909          470 :   if (BYTES_BIG_ENDIAN)
    2910              :     {
    2911              :       shift_n = prec - shift_n - mask_width;
    2912              :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2913              :     }
    2914              : 
    2915          470 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2916              :     {
    2917            0 :       pattern_stmt =
    2918            0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2919              :                              NOP_EXPR, value);
    2920            0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2921            0 :       value = gimple_get_lhs (pattern_stmt);
    2922              :     }
    2923              : 
    2924              :   /* Shift VALUE into place.  */
    2925          470 :   tree shifted = value;
    2926          470 :   if (shift_n)
    2927              :     {
    2928          249 :       gimple_seq stmts = NULL;
    2929          249 :       shifted
    2930          249 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2931          249 :       if (!gimple_seq_empty_p (stmts))
    2932          112 :         append_pattern_def_seq (vinfo, stmt_info,
    2933              :                                 gimple_seq_first_stmt (stmts));
    2934              :     }
    2935              : 
    2936          470 :   tree mask_t
    2937          470 :     = wide_int_to_tree (container_type,
    2938          470 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2939              : 
    2940              :   /* Clear bits we don't want to write back from SHIFTED.  */
    2941          470 :   gimple_seq stmts = NULL;
    2942          470 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2943              :                               mask_t);
    2944          470 :   if (!gimple_seq_empty_p (stmts))
    2945              :     {
    2946          110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2947          110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2948              :     }
    2949              : 
    2950              :   /* Mask off the bits in the container that we are to write to.  */
    2951          470 :   mask_t = wide_int_to_tree (container_type,
    2952          470 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2953          470 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2954          470 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2955          470 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2956              : 
    2957              :   /* Write MASKED into CLEARED.  */
    2958          470 :   pattern_stmt
    2959          470 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2960              :                            BIT_IOR_EXPR, cleared, masked);
    2961              : 
    2962          470 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2963          470 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2964              : 
    2965          470 :   return pattern_stmt;
    2966              : }
    2967              : 
    2968              : 
    2969              : /* Recognize cases in which an operation is performed in one type WTYPE
    2970              :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2971              :    if we have:
    2972              : 
    2973              :      ATYPE a;  // narrower than NTYPE
    2974              :      BTYPE b;  // narrower than NTYPE
    2975              :      WTYPE aw = (WTYPE) a;
    2976              :      WTYPE bw = (WTYPE) b;
    2977              :      WTYPE res = aw + bw;  // only uses of aw and bw
    2978              : 
    2979              :    then it would be more efficient to do:
    2980              : 
    2981              :      NTYPE an = (NTYPE) a;
    2982              :      NTYPE bn = (NTYPE) b;
    2983              :      NTYPE resn = an + bn;
    2984              :      WTYPE res = (WTYPE) resn;
    2985              : 
    2986              :    Other situations include things like:
    2987              : 
    2988              :      ATYPE a;  // NTYPE or narrower
    2989              :      WTYPE aw = (WTYPE) a;
    2990              :      WTYPE res = aw + b;
    2991              : 
    2992              :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2993              :    to truncate "b" and do the operation on NTYPE instead:
    2994              : 
    2995              :      NTYPE an = (NTYPE) a;
    2996              :      NTYPE bn = (NTYPE) b;  // truncation
    2997              :      NTYPE resn = an + bn;
    2998              :      WTYPE res = (WTYPE) resn;
    2999              : 
    3000              :    All users of "res" should then use "resn" instead, making the final
    3001              :    statement dead (not marked as relevant).  The final statement is still
    3002              :    needed to maintain the type correctness of the IR.
    3003              : 
    3004              :    vect_determine_precisions has already determined the minimum
    3005              :    precison of the operation and the minimum precision required
    3006              :    by users of the result.  */
    3007              : 
    3008              : static gimple *
    3009     30074104 : vect_recog_over_widening_pattern (vec_info *vinfo,
    3010              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3011              : {
    3012     30074104 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3013     20329422 :   if (!last_stmt)
    3014              :     return NULL;
    3015              : 
    3016              :   /* See whether we have found that this operation can be done on a
    3017              :      narrower type without changing its semantics.  */
    3018     20329422 :   unsigned int new_precision = last_stmt_info->operation_precision;
    3019     20329422 :   if (!new_precision)
    3020              :     return NULL;
    3021              : 
    3022      1474274 :   tree lhs = gimple_assign_lhs (last_stmt);
    3023      1474274 :   tree type = TREE_TYPE (lhs);
    3024      1474274 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3025              : 
    3026              :   /* Punt for reductions where we don't handle the type conversions.  */
    3027      1474274 :   if (vect_is_reduction (last_stmt_info))
    3028              :     return NULL;
    3029              : 
    3030              :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    3031              :      operands are interesting.  */
    3032      1466503 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    3033              : 
    3034              :   /* Check the operands.  */
    3035      1466503 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    3036      1466503 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    3037      1466503 :   unprom.quick_grow_cleared (nops);
    3038      1466503 :   unsigned int min_precision = 0;
    3039      1466503 :   bool single_use_p = false;
    3040      4382870 :   for (unsigned int i = 0; i < nops; ++i)
    3041              :     {
    3042      2917813 :       tree op = gimple_op (last_stmt, first_op + i);
    3043      2917813 :       if (TREE_CODE (op) == INTEGER_CST)
    3044      1331015 :         unprom[i].set_op (op, vect_constant_def);
    3045      1586798 :       else if (TREE_CODE (op) == SSA_NAME)
    3046              :         {
    3047      1586798 :           bool op_single_use_p = true;
    3048      1586798 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3049              :                                                      &op_single_use_p))
    3050         1446 :             return NULL;
    3051              :           /* If:
    3052              : 
    3053              :              (1) N bits of the result are needed;
    3054              :              (2) all inputs are widened from M<N bits; and
    3055              :              (3) one operand OP is a single-use SSA name
    3056              : 
    3057              :              we can shift the M->N widening from OP to the output
    3058              :              without changing the number or type of extensions involved.
    3059              :              This then reduces the number of copies of STMT_INFO.
    3060              : 
    3061              :              If instead of (3) more than one operand is a single-use SSA name,
    3062              :              shifting the extension to the output is even more of a win.
    3063              : 
    3064              :              If instead:
    3065              : 
    3066              :              (1) N bits of the result are needed;
    3067              :              (2) one operand OP2 is widened from M2<N bits;
    3068              :              (3) another operand OP1 is widened from M1<M2 bits; and
    3069              :              (4) both OP1 and OP2 are single-use
    3070              : 
    3071              :              the choice is between:
    3072              : 
    3073              :              (a) truncating OP2 to M1, doing the operation on M1,
    3074              :                  and then widening the result to N
    3075              : 
    3076              :              (b) widening OP1 to M2, doing the operation on M2, and then
    3077              :                  widening the result to N
    3078              : 
    3079              :              Both shift the M2->N widening of the inputs to the output.
    3080              :              (a) additionally shifts the M1->M2 widening to the output;
    3081              :              it requires fewer copies of STMT_INFO but requires an extra
    3082              :              M2->M1 truncation.
    3083              : 
    3084              :              Which is better will depend on the complexity and cost of
    3085              :              STMT_INFO, which is hard to predict at this stage.  However,
    3086              :              a clear tie-breaker in favor of (b) is the fact that the
    3087              :              truncation in (a) increases the length of the operation chain.
    3088              : 
    3089              :              If instead of (4) only one of OP1 or OP2 is single-use,
    3090              :              (b) is still a win over doing the operation in N bits:
    3091              :              it still shifts the M2->N widening on the single-use operand
    3092              :              to the output and reduces the number of STMT_INFO copies.
    3093              : 
    3094              :              If neither operand is single-use then operating on fewer than
    3095              :              N bits might lead to more extensions overall.  Whether it does
    3096              :              or not depends on global information about the vectorization
    3097              :              region, and whether that's a good trade-off would again
    3098              :              depend on the complexity and cost of the statements involved,
    3099              :              as well as things like register pressure that are not normally
    3100              :              modelled at this stage.  We therefore ignore these cases
    3101              :              and just optimize the clear single-use wins above.
    3102              : 
    3103              :              Thus we take the maximum precision of the unpromoted operands
    3104              :              and record whether any operand is single-use.  */
    3105      1585352 :           if (unprom[i].dt == vect_internal_def)
    3106              :             {
    3107       976530 :               min_precision = MAX (min_precision,
    3108              :                                    TYPE_PRECISION (unprom[i].type));
    3109       976530 :               single_use_p |= op_single_use_p;
    3110              :             }
    3111              :         }
    3112              :       else
    3113              :         return NULL;
    3114              :     }
    3115              : 
    3116              :   /* Although the operation could be done in operation_precision, we have
    3117              :      to balance that against introducing extra truncations or extensions.
    3118              :      Calculate the minimum precision that can be handled efficiently.
    3119              : 
    3120              :      The loop above determined that the operation could be handled
    3121              :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3122              :      extension from the inputs to the output without introducing more
    3123              :      instructions, and would reduce the number of instructions required
    3124              :      for STMT_INFO itself.
    3125              : 
    3126              :      vect_determine_precisions has also determined that the result only
    3127              :      needs min_output_precision bits.  Truncating by a factor of N times
    3128              :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3129              :      than min_output_precision, doing the operation in TYPE and truncating
    3130              :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3131              :      In contrast:
    3132              : 
    3133              :      - truncating the input to a unary operation and doing the operation
    3134              :        in the new type requires at most N - 1 + 1 = N instructions per
    3135              :        output vector
    3136              : 
    3137              :      - doing the same for a binary operation requires at most
    3138              :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3139              : 
    3140              :      Both unary and binary operations require fewer instructions than
    3141              :      this if the operands were extended from a suitable truncated form.
    3142              :      Thus there is usually nothing to lose by doing operations in
    3143              :      min_output_precision bits, but there can be something to gain.  */
    3144      1465057 :   if (!single_use_p)
    3145      1141511 :     min_precision = last_stmt_info->min_output_precision;
    3146              :   else
    3147       323546 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3148              : 
    3149              :   /* Apply the minimum efficient precision we just calculated.  */
    3150      1465057 :   if (new_precision < min_precision)
    3151              :     new_precision = min_precision;
    3152      1465057 :   new_precision = vect_element_precision (new_precision);
    3153      1465057 :   if (new_precision >= TYPE_PRECISION (type))
    3154              :     return NULL;
    3155              : 
    3156       143143 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3157              : 
    3158       143143 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3159       143143 :   if (!*type_out)
    3160              :     return NULL;
    3161              : 
    3162              :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3163       126281 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3164       126281 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3165              : 
    3166              :   /* If we're truncating an operation, we need to make sure that we
    3167              :      don't introduce new undefined overflow.  The codes tested here are
    3168              :      a subset of those accepted by vect_truncatable_operation_p.  */
    3169       126281 :   tree op_type = new_type;
    3170       126281 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3171       162724 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3172        25564 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3173              : 
    3174       126281 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3175       126281 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3176       126281 :   if (!new_vectype || !op_vectype)
    3177              :     return NULL;
    3178              : 
    3179              :   /* Verify we can handle the new operation.  For shifts and rotates
    3180              :      apply heuristic of whether we are likely facing vector-vector or
    3181              :      vector-scalar operation.  Since we are eventually expecting that
    3182              :      a later pattern might eventually want to rewrite an unsupported
    3183              :      into a supported case error on that side in case the original
    3184              :      operation was not supported either or this is a binary operation
    3185              :      and the 2nd operand is constant.  */
    3186       126281 :   if (code == RSHIFT_EXPR || code == LSHIFT_EXPR || code == RROTATE_EXPR)
    3187              :     {
    3188        27949 :       if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3189        26978 :           && ((unprom[1].dt != vect_external_def
    3190        26578 :                && unprom[1].dt != vect_constant_def)
    3191        18530 :               || !target_has_vecop_for_code (code, op_vectype, optab_scalar))
    3192        36426 :           && !(!target_has_vecop_for_code (code, *type_out, optab_vector)
    3193         7541 :                && ((unprom[1].dt != vect_external_def
    3194         7541 :                     || unprom[1].dt != vect_constant_def)
    3195              :                    || !target_has_vecop_for_code (code, *type_out,
    3196              :                                                   optab_scalar))))
    3197              :         return NULL;
    3198              :     }
    3199        98332 :   else if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
    3200        98332 :            && (target_has_vecop_for_code (code, *type_out, optab_vector)
    3201           15 :                && !(nops == 2 && unprom[1].dt == vect_constant_def)))
    3202              :     return NULL;
    3203              : 
    3204       125336 :   if (dump_enabled_p ())
    3205         4283 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3206              :                      type, new_type);
    3207              : 
    3208              :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3209       125336 :   tree ops[3] = {};
    3210       125606 :   for (unsigned int i = 1; i < first_op; ++i)
    3211          270 :     ops[i - 1] = gimple_op (last_stmt, i);
    3212       125336 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3213       125336 :                        op_type, &unprom[0], op_vectype);
    3214              : 
    3215              :   /* Use the operation to produce a result of type OP_TYPE.  */
    3216       125336 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3217       125336 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3218              :                                               ops[0], ops[1], ops[2]);
    3219       125336 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3220              : 
    3221       125336 :   if (dump_enabled_p ())
    3222         4283 :     dump_printf_loc (MSG_NOTE, vect_location,
    3223              :                      "created pattern stmt: %G", pattern_stmt);
    3224              : 
    3225              :   /* Convert back to the original signedness, if OP_TYPE is different
    3226              :      from NEW_TYPE.  */
    3227       125336 :   if (op_type != new_type)
    3228        25558 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3229              :                                         pattern_stmt, op_vectype);
    3230              : 
    3231              :   /* Promote the result to the original type.  */
    3232       125336 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3233              :                                       pattern_stmt, new_vectype);
    3234              : 
    3235       125336 :   return pattern_stmt;
    3236      1466503 : }
    3237              : 
    3238              : /* Recognize the following patterns:
    3239              : 
    3240              :      ATYPE a;  // narrower than TYPE
    3241              :      BTYPE b;  // narrower than TYPE
    3242              : 
    3243              :    1) Multiply high with scaling
    3244              :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3245              :      Here, c is bitsize (TYPE) / 2 - 1.
    3246              : 
    3247              :    2) ... or also with rounding
    3248              :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3249              :      Here, d is bitsize (TYPE) / 2 - 2.
    3250              : 
    3251              :    3) Normal multiply high
    3252              :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3253              :      Here, e is bitsize (TYPE) / 2.
    3254              : 
    3255              :    where only the bottom half of res is used.  */
    3256              : 
    3257              : static gimple *
    3258     30190600 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3259              :                           stmt_vec_info last_stmt_info, tree *type_out)
    3260              : {
    3261              :   /* Check for a right shift.  */
    3262     30190600 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3263     20445795 :   if (!last_stmt
    3264     20445795 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3265              :     return NULL;
    3266              : 
    3267              :   /* Check that the shift result is wider than the users of the
    3268              :      result need (i.e. that narrowing would be a natural choice).  */
    3269       355072 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3270       355072 :   unsigned int target_precision
    3271       355072 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3272       355072 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3273       355072 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3274              :     return NULL;
    3275              : 
    3276              :   /* Look through any change in sign on the outer shift input.  */
    3277        44390 :   vect_unpromoted_value unprom_rshift_input;
    3278        44390 :   tree rshift_input = vect_look_through_possible_promotion
    3279        44390 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3280        44390 :   if (!rshift_input
    3281        44390 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3282        43811 :            != TYPE_PRECISION (lhs_type))
    3283              :     return NULL;
    3284              : 
    3285              :   /* Get the definition of the shift input.  */
    3286        41708 :   stmt_vec_info rshift_input_stmt_info
    3287        41708 :     = vect_get_internal_def (vinfo, rshift_input);
    3288        41708 :   if (!rshift_input_stmt_info)
    3289              :     return NULL;
    3290        36657 :   gassign *rshift_input_stmt
    3291     30222917 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3292        32404 :   if (!rshift_input_stmt)
    3293              :     return NULL;
    3294              : 
    3295        32404 :   stmt_vec_info mulh_stmt_info;
    3296        32404 :   tree scale_term;
    3297        32404 :   bool rounding_p = false;
    3298              : 
    3299              :   /* Check for the presence of the rounding term.  */
    3300        39359 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3301              :     {
    3302              :       /* Check that the outer shift was by 1.  */
    3303        18684 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3304         9294 :         return NULL;
    3305              : 
    3306              :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3307         1260 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3308              :         return NULL;
    3309              : 
    3310              :       /* Look through any change in sign on the addition input.  */
    3311           88 :       vect_unpromoted_value unprom_plus_input;
    3312           88 :       tree plus_input = vect_look_through_possible_promotion
    3313           88 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3314           88 :       if (!plus_input
    3315           88 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3316           88 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3317              :         return NULL;
    3318              : 
    3319              :       /* Get the definition of the multiply-high-scale part.  */
    3320           88 :       stmt_vec_info plus_input_stmt_info
    3321           88 :         = vect_get_internal_def (vinfo, plus_input);
    3322           88 :       if (!plus_input_stmt_info)
    3323              :         return NULL;
    3324           88 :       gassign *plus_input_stmt
    3325         9382 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3326           88 :       if (!plus_input_stmt
    3327           88 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3328              :         return NULL;
    3329              : 
    3330              :       /* Look through any change in sign on the scaling input.  */
    3331           48 :       vect_unpromoted_value unprom_scale_input;
    3332           48 :       tree scale_input = vect_look_through_possible_promotion
    3333           48 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3334           48 :       if (!scale_input
    3335           48 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3336           48 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3337              :         return NULL;
    3338              : 
    3339              :       /* Get the definition of the multiply-high part.  */
    3340           48 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3341           48 :       if (!mulh_stmt_info)
    3342              :         return NULL;
    3343              : 
    3344              :       /* Get the scaling term.  */
    3345           48 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3346           48 :       rounding_p = true;
    3347              :     }
    3348              :   else
    3349              :     {
    3350        23062 :       mulh_stmt_info = rshift_input_stmt_info;
    3351        23062 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3352              :     }
    3353              : 
    3354              :   /* Check that the scaling factor is constant.  */
    3355        23110 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3356              :     return NULL;
    3357              : 
    3358              :   /* Check whether the scaling input term can be seen as two widened
    3359              :      inputs multiplied together.  */
    3360        66612 :   vect_unpromoted_value unprom_mult[2];
    3361        22204 :   tree new_type;
    3362        22204 :   unsigned int nops
    3363        22204 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3364              :                             false, 2, unprom_mult, &new_type);
    3365        22204 :   if (nops != 2)
    3366              :     return NULL;
    3367              : 
    3368              :   /* Adjust output precision.  */
    3369          988 :   if (TYPE_PRECISION (new_type) < target_precision)
    3370            0 :     new_type = build_nonstandard_integer_type
    3371            0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3372              : 
    3373          988 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3374          988 :   internal_fn ifn;
    3375              :   /* Check that the scaling factor is expected.  Instead of
    3376              :      target_precision, we should use the one that we actually
    3377              :      use for internal function.  */
    3378          988 :   if (rounding_p)
    3379              :     {
    3380              :       /* Check pattern 2).  */
    3381           96 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3382          144 :           != TYPE_PRECISION (lhs_type))
    3383              :         return NULL;
    3384              : 
    3385              :       ifn = IFN_MULHRS;
    3386              :     }
    3387              :   else
    3388              :     {
    3389              :       /* Check for pattern 1).  */
    3390         1880 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3391         2820 :           == TYPE_PRECISION (lhs_type))
    3392              :         ifn = IFN_MULHS;
    3393              :       /* Check for pattern 3).  */
    3394          906 :       else if (wi::to_widest (scale_term) + mult_precision
    3395         1812 :                == TYPE_PRECISION (lhs_type))
    3396              :         ifn = IFN_MULH;
    3397              :       else
    3398              :         return NULL;
    3399              :     }
    3400              : 
    3401          925 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3402              : 
    3403              :   /* Check for target support.  */
    3404          925 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3405          925 :   if (!new_vectype
    3406         1834 :       || !direct_internal_fn_supported_p
    3407          909 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3408          838 :     return NULL;
    3409              : 
    3410              :   /* The IR requires a valid vector type for the cast result, even though
    3411              :      it's likely to be discarded.  */
    3412           87 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3413           87 :   if (!*type_out)
    3414              :     return NULL;
    3415              : 
    3416              :   /* Generate the IFN_MULHRS call.  */
    3417           87 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3418           87 :   tree new_ops[2];
    3419           87 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3420              :                        unprom_mult, new_vectype);
    3421           87 :   gcall *mulhrs_stmt
    3422           87 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3423           87 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3424           87 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3425              : 
    3426           87 :   if (dump_enabled_p ())
    3427            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3428              :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3429              : 
    3430           87 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3431           87 :                               mulhrs_stmt, new_vectype);
    3432              : }
    3433              : 
    3434              : /* Recognize the patterns:
    3435              : 
    3436              :             ATYPE a;  // narrower than TYPE
    3437              :             BTYPE b;  // narrower than TYPE
    3438              :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3439              :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3440              : 
    3441              :    where only the bottom half of avg is used.  Try to transform them into:
    3442              : 
    3443              :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3444              :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3445              : 
    3446              :   followed by:
    3447              : 
    3448              :             TYPE avg = (TYPE) avg';
    3449              : 
    3450              :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3451              :   of avg is used, all or part of the cast of avg' should become redundant.
    3452              : 
    3453              :   If there is no target support available, generate code to distribute rshift
    3454              :   over plus and add a carry.  */
    3455              : 
    3456              : static gimple *
    3457     30188951 : vect_recog_average_pattern (vec_info *vinfo,
    3458              :                             stmt_vec_info last_stmt_info, tree *type_out)
    3459              : {
    3460              :   /* Check for a shift right by one bit.  */
    3461     30188951 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3462     20444269 :   if (!last_stmt
    3463     20444269 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3464       354943 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3465     30132511 :     return NULL;
    3466              : 
    3467              :   /* Check that the shift result is wider than the users of the
    3468              :      result need (i.e. that narrowing would be a natural choice).  */
    3469        56440 :   tree lhs = gimple_assign_lhs (last_stmt);
    3470        56440 :   tree type = TREE_TYPE (lhs);
    3471        56440 :   unsigned int target_precision
    3472        56440 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3473        56440 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3474              :     return NULL;
    3475              : 
    3476              :   /* Look through any change in sign on the shift input.  */
    3477         2117 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3478         2117 :   vect_unpromoted_value unprom_plus;
    3479         2117 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3480              :                                                      &unprom_plus);
    3481         2117 :   if (!rshift_rhs
    3482         2117 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3483              :     return NULL;
    3484              : 
    3485              :   /* Get the definition of the shift input.  */
    3486         2115 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3487         2115 :   if (!plus_stmt_info)
    3488              :     return NULL;
    3489              : 
    3490              :   /* Check whether the shift input can be seen as a tree of additions on
    3491              :      2 or 3 widened inputs.
    3492              : 
    3493              :      Note that the pattern should be a win even if the result of one or
    3494              :      more additions is reused elsewhere: if the pattern matches, we'd be
    3495              :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3496         8388 :   internal_fn ifn = IFN_AVG_FLOOR;
    3497         8388 :   vect_unpromoted_value unprom[3];
    3498         2097 :   tree new_type;
    3499         2097 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3500         2097 :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3501              :                                             unprom, &new_type);
    3502         2097 :   if (nops == 0)
    3503              :     return NULL;
    3504          869 :   if (nops == 3)
    3505              :     {
    3506              :       /* Check that one operand is 1.  */
    3507              :       unsigned int i;
    3508          873 :       for (i = 0; i < 3; ++i)
    3509          819 :         if (integer_onep (unprom[i].op))
    3510              :           break;
    3511          273 :       if (i == 3)
    3512              :         return NULL;
    3513              :       /* Throw away the 1 operand and keep the other two.  */
    3514          219 :       if (i < 2)
    3515            0 :         unprom[i] = unprom[2];
    3516              :       ifn = IFN_AVG_CEIL;
    3517              :     }
    3518              : 
    3519          815 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3520              : 
    3521              :   /* We know that:
    3522              : 
    3523              :      (a) the operation can be viewed as:
    3524              : 
    3525              :            TYPE widened0 = (TYPE) UNPROM[0];
    3526              :            TYPE widened1 = (TYPE) UNPROM[1];
    3527              :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3528              :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3529              : 
    3530              :      (b) the first two statements are equivalent to:
    3531              : 
    3532              :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3533              :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3534              : 
    3535              :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3536              :          where sensible;
    3537              : 
    3538              :      (d) all the operations can be performed correctly at twice the width of
    3539              :          NEW_TYPE, due to the nature of the average operation; and
    3540              : 
    3541              :      (e) users of the result of the right shift need only TARGET_PRECISION
    3542              :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3543              :          precision.
    3544              : 
    3545              :      Under these circumstances, the only situation in which NEW_TYPE
    3546              :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3547              :      and an addition result are all used more than once.  Thus we can
    3548              :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3549              :      as "free", whereas widening the result of the average instruction
    3550              :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3551              :      therefore better not to go narrower than TARGET_PRECISION.  */
    3552          815 :   if (TYPE_PRECISION (new_type) < target_precision)
    3553            0 :     new_type = build_nonstandard_integer_type (target_precision,
    3554            0 :                                                TYPE_UNSIGNED (new_type));
    3555              : 
    3556              :   /* Check for target support.  */
    3557          815 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3558          815 :   if (!new_vectype)
    3559              :     return NULL;
    3560              : 
    3561          815 :   bool fallback_p = false;
    3562              : 
    3563          815 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3564              :     ;
    3565          692 :   else if (TYPE_UNSIGNED (new_type)
    3566          256 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3567          256 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3568          256 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3569          948 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3570              :     fallback_p = true;
    3571              :   else
    3572          436 :     return NULL;
    3573              : 
    3574              :   /* The IR requires a valid vector type for the cast result, even though
    3575              :      it's likely to be discarded.  */
    3576          379 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3577          379 :   if (!*type_out)
    3578              :     return NULL;
    3579              : 
    3580          375 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3581          375 :   tree new_ops[2];
    3582          375 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3583              :                        unprom, new_vectype);
    3584              : 
    3585          375 :   if (fallback_p)
    3586              :     {
    3587              :       /* As a fallback, generate code for following sequence:
    3588              : 
    3589              :          shifted_op0 = new_ops[0] >> 1;
    3590              :          shifted_op1 = new_ops[1] >> 1;
    3591              :          sum_of_shifted = shifted_op0 + shifted_op1;
    3592              :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3593              :          carry = unmasked_carry & 1;
    3594              :          new_var = sum_of_shifted + carry;
    3595              :       */
    3596              : 
    3597          252 :       tree one_cst = build_one_cst (new_type);
    3598          252 :       gassign *g;
    3599              : 
    3600          252 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3601          252 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3602          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3603              : 
    3604          252 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3605          252 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3606          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3607              : 
    3608          252 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3609          252 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3610              :                                shifted_op0, shifted_op1);
    3611          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3612              : 
    3613          252 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3614          252 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3615          252 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3616          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3617              : 
    3618          252 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3619          252 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3620          252 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3621              : 
    3622          252 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3623          252 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3624              :     }
    3625              : 
    3626              :   /* Generate the IFN_AVG* call.  */
    3627          123 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3628              :                                                     new_ops[1]);
    3629          123 :   gimple_call_set_lhs (average_stmt, new_var);
    3630          123 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3631              : 
    3632          123 :   if (dump_enabled_p ())
    3633           31 :     dump_printf_loc (MSG_NOTE, vect_location,
    3634              :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3635              : 
    3636          123 :   return vect_convert_output (vinfo, last_stmt_info,
    3637          123 :                               type, average_stmt, new_vectype);
    3638              : }
    3639              : 
    3640              : /* Recognize cases in which the input to a cast is wider than its
    3641              :    output, and the input is fed by a widening operation.  Fold this
    3642              :    by removing the unnecessary intermediate widening.  E.g.:
    3643              : 
    3644              :      unsigned char a;
    3645              :      unsigned int b = (unsigned int) a;
    3646              :      unsigned short c = (unsigned short) b;
    3647              : 
    3648              :    -->
    3649              : 
    3650              :      unsigned short c = (unsigned short) a;
    3651              : 
    3652              :    Although this is rare in input IR, it is an expected side-effect
    3653              :    of the over-widening pattern above.
    3654              : 
    3655              :    This is beneficial also for integer-to-float conversions, if the
    3656              :    widened integer has more bits than the float, and if the unwidened
    3657              :    input doesn't.  */
    3658              : 
    3659              : static gimple *
    3660     30190600 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3661              :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3662              : {
    3663              :   /* Check for a cast, including an integer-to-float conversion.  */
    3664     50591228 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3665     20445708 :   if (!last_stmt)
    3666              :     return NULL;
    3667     20445708 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3668     20445708 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3669              :     return NULL;
    3670              : 
    3671              :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3672      2903095 :   tree lhs = gimple_assign_lhs (last_stmt);
    3673      2903095 :   if (!lhs)
    3674              :     return NULL;
    3675      2903095 :   tree lhs_type = TREE_TYPE (lhs);
    3676      2903095 :   scalar_mode lhs_mode;
    3677      2883442 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3678      5784801 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3679        25193 :     return NULL;
    3680              : 
    3681              :   /* Check for a narrowing operation (from a vector point of view).  */
    3682      2877902 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3683      2877902 :   tree rhs_type = TREE_TYPE (rhs);
    3684      2877902 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3685      2576264 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3686      7867798 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3687              :     return NULL;
    3688              : 
    3689              :   /* Try to find an unpromoted input.  */
    3690       332605 :   vect_unpromoted_value unprom;
    3691       332605 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3692       332605 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3693              :     return NULL;
    3694              : 
    3695              :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3696              :      same when extending from UNPROM as they are when extending from RHS.  */
    3697        45210 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3698        45210 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3699              :     return NULL;
    3700              : 
    3701              :   /* We can get the same result by casting UNPROM directly, to avoid
    3702              :      the unnecessary widening and narrowing.  */
    3703        45080 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3704              : 
    3705        45080 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3706        45080 :   if (!*type_out)
    3707              :     return NULL;
    3708              : 
    3709        45080 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3710        45080 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3711        45080 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3712              : 
    3713        45080 :   return pattern_stmt;
    3714              : }
    3715              : 
    3716              : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3717              :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3718              : 
    3719              : static gimple *
    3720     30120638 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3721              :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3722              : {
    3723     30120638 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3724     30120638 :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3725     30120638 :                                       "vect_recog_widen_shift_pattern");
    3726              : }
    3727              : 
    3728              : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3729              : 
    3730              :    type a_t, b_t, c_t;
    3731              : 
    3732              :    S0 a_t = b_t r<< c_t;
    3733              : 
    3734              :   Input/Output:
    3735              : 
    3736              :   * STMT_VINFO: The stmt from which the pattern search begins,
    3737              :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3738              :     with a sequence:
    3739              : 
    3740              :    S1 d_t = -c_t;
    3741              :    S2 e_t = d_t & (B - 1);
    3742              :    S3 f_t = b_t << c_t;
    3743              :    S4 g_t = b_t >> e_t;
    3744              :    S0 a_t = f_t | g_t;
    3745              : 
    3746              :     where B is element bitsize of type.
    3747              : 
    3748              :   Output:
    3749              : 
    3750              :   * TYPE_OUT: The type of the output of this pattern.
    3751              : 
    3752              :   * Return value: A new stmt that will be used to replace the rotate
    3753              :     S0 stmt.  */
    3754              : 
    3755              : static gimple *
    3756     30120638 : vect_recog_rotate_pattern (vec_info *vinfo,
    3757              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3758              : {
    3759     30120638 :   gimple *last_stmt = stmt_vinfo->stmt;
    3760     30120638 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3761     30120638 :   gimple *pattern_stmt, *def_stmt;
    3762     30120638 :   enum tree_code rhs_code;
    3763     30120638 :   enum vect_def_type dt;
    3764     30120638 :   optab optab1, optab2;
    3765     30120638 :   edge ext_def = NULL;
    3766     30120638 :   bool bswap16_p = false;
    3767              : 
    3768     30120638 :   if (is_gimple_assign (last_stmt))
    3769              :     {
    3770     20375726 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3771     20375726 :       switch (rhs_code)
    3772              :         {
    3773         6675 :         case LROTATE_EXPR:
    3774         6675 :         case RROTATE_EXPR:
    3775         6675 :           break;
    3776              :         default:
    3777              :           return NULL;
    3778              :         }
    3779              : 
    3780         6675 :       lhs = gimple_assign_lhs (last_stmt);
    3781         6675 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3782         6675 :       type = TREE_TYPE (oprnd0);
    3783         6675 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3784              :     }
    3785      9744912 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3786              :     {
    3787              :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3788              :          The vectorizer has bswap support, but only if the argument isn't
    3789              :          promoted.  */
    3790          170 :       lhs = gimple_call_lhs (last_stmt);
    3791          170 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3792          170 :       type = TREE_TYPE (oprnd0);
    3793          170 :       if (!lhs
    3794          170 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3795          170 :           || TYPE_PRECISION (type) <= 16
    3796            0 :           || TREE_CODE (oprnd0) != SSA_NAME
    3797          170 :           || BITS_PER_UNIT != 8)
    3798          170 :         return NULL;
    3799              : 
    3800            0 :       stmt_vec_info def_stmt_info;
    3801            0 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3802              :         return NULL;
    3803              : 
    3804            0 :       if (dt != vect_internal_def)
    3805              :         return NULL;
    3806              : 
    3807            0 :       if (gimple_assign_cast_p (def_stmt))
    3808              :         {
    3809            0 :           def = gimple_assign_rhs1 (def_stmt);
    3810            0 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3811            0 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3812              :             oprnd0 = def;
    3813              :         }
    3814              : 
    3815            0 :       type = TREE_TYPE (lhs);
    3816            0 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3817            0 :       if (vectype == NULL_TREE)
    3818              :         return NULL;
    3819              : 
    3820            0 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3821              :         {
    3822              :           /* The encoding uses one stepped pattern for each byte in the
    3823              :              16-bit word.  */
    3824            0 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3825            0 :           for (unsigned i = 0; i < 3; ++i)
    3826            0 :             for (unsigned j = 0; j < 2; ++j)
    3827            0 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3828              : 
    3829            0 :           vec_perm_indices indices (elts, 1,
    3830            0 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3831            0 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3832            0 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3833              :             {
    3834              :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3835              :                  undo the argument promotion.  */
    3836            0 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3837              :                 {
    3838            0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3839            0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3840            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3841            0 :                   oprnd0 = def;
    3842              :                 }
    3843              : 
    3844              :               /* Pattern detected.  */
    3845            0 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3846              : 
    3847            0 :               *type_out = vectype;
    3848              : 
    3849              :               /* Pattern supported.  Create a stmt to be used to replace the
    3850              :                  pattern, with the unpromoted argument.  */
    3851            0 :               var = vect_recog_temp_ssa_var (type, NULL);
    3852            0 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3853              :                                                 1, oprnd0);
    3854            0 :               gimple_call_set_lhs (pattern_stmt, var);
    3855            0 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3856              :                                       gimple_call_fntype (last_stmt));
    3857            0 :               return pattern_stmt;
    3858              :             }
    3859            0 :         }
    3860              : 
    3861            0 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3862            0 :       rhs_code = LROTATE_EXPR;
    3863            0 :       bswap16_p = true;
    3864              :     }
    3865              :   else
    3866              :     return NULL;
    3867              : 
    3868         6675 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3869         6555 :       || !INTEGRAL_TYPE_P (type)
    3870        12869 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3871              :     return NULL;
    3872              : 
    3873         6194 :   stmt_vec_info def_stmt_info;
    3874         6194 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3875              :     return NULL;
    3876              : 
    3877         6194 :   if (dt != vect_internal_def
    3878         5988 :       && dt != vect_constant_def
    3879           21 :       && dt != vect_external_def)
    3880              :     return NULL;
    3881              : 
    3882         6188 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3883         6188 :   if (vectype == NULL_TREE)
    3884              :     return NULL;
    3885              : 
    3886              :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3887              :      don't do anything here.  */
    3888         5961 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3889         5961 :   if (optab1
    3890         5961 :       && can_implement_p (optab1, TYPE_MODE (vectype)))
    3891              :     {
    3892          354 :      use_rotate:
    3893          354 :       if (bswap16_p)
    3894              :         {
    3895            0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3896              :             {
    3897            0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3898            0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3899            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3900            0 :               oprnd0 = def;
    3901              :             }
    3902              : 
    3903              :           /* Pattern detected.  */
    3904            0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3905              : 
    3906            0 :           *type_out = vectype;
    3907              : 
    3908              :           /* Pattern supported.  Create a stmt to be used to replace the
    3909              :              pattern.  */
    3910            0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3911            0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3912              :                                               oprnd1);
    3913            0 :           return pattern_stmt;
    3914              :         }
    3915              :       return NULL;
    3916              :     }
    3917              : 
    3918         5925 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3919              :     {
    3920         5877 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3921         5877 :       if (optab2
    3922         5877 :           && can_implement_p (optab2, TYPE_MODE (vectype)))
    3923          318 :         goto use_rotate;
    3924              :     }
    3925              : 
    3926              :   /* We may not use a reduction operand twice.  */
    3927         5607 :   if (vect_is_reduction (stmt_vinfo))
    3928              :     return NULL;
    3929              : 
    3930         5586 :   tree utype = unsigned_type_for (type);
    3931         5586 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3932         5586 :   if (!uvectype)
    3933              :     return NULL;
    3934              : 
    3935              :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3936              :      don't do anything here either.  */
    3937         5586 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3938         5586 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3939         5586 :   if (!optab1
    3940         5586 :       || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3941          599 :       || !optab2
    3942         6185 :       || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3943              :     {
    3944         4987 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3945              :         return NULL;
    3946         4952 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3947         4952 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3948         4952 :       if (!optab1
    3949         4952 :           || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3950         3745 :           || !optab2
    3951         8697 :           || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3952         1207 :         return NULL;
    3953              :     }
    3954              : 
    3955         4344 :   *type_out = vectype;
    3956              : 
    3957         4344 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3958              :     {
    3959           47 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3960           47 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3961           47 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3962           47 :       oprnd0 = def;
    3963              :     }
    3964              : 
    3965         4344 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3966           13 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3967              : 
    3968         4344 :   def = NULL_TREE;
    3969         4344 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3970         4344 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3971              :     def = oprnd1;
    3972           28 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3973              :     {
    3974            0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3975            0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3976            0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3977            0 :              == TYPE_PRECISION (type))
    3978              :         def = rhs1;
    3979              :     }
    3980              : 
    3981         4316 :   if (def == NULL_TREE)
    3982              :     {
    3983           28 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3984           28 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3985           28 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3986              :     }
    3987         4344 :   stype = TREE_TYPE (def);
    3988              : 
    3989         4344 :   if (TREE_CODE (def) == INTEGER_CST)
    3990              :     {
    3991         4232 :       if (!tree_fits_uhwi_p (def)
    3992         4232 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3993         8464 :           || integer_zerop (def))
    3994            0 :         return NULL;
    3995         4232 :       def2 = build_int_cst (stype,
    3996         4232 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3997              :     }
    3998              :   else
    3999              :     {
    4000          112 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    4001              : 
    4002          112 :       if (vecstype == NULL_TREE)
    4003              :         return NULL;
    4004          112 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4005          112 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    4006          112 :       if (ext_def)
    4007              :         {
    4008           13 :           basic_block new_bb
    4009           13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4010           13 :           gcc_assert (!new_bb);
    4011              :         }
    4012              :       else
    4013           99 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4014              : 
    4015          112 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    4016          112 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    4017          112 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    4018              :                                       gimple_assign_lhs (def_stmt), mask);
    4019          112 :       if (ext_def)
    4020              :         {
    4021           13 :           basic_block new_bb
    4022           13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    4023           13 :           gcc_assert (!new_bb);
    4024              :         }
    4025              :       else
    4026           99 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4027              :     }
    4028              : 
    4029         4344 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    4030         8603 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    4031              :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    4032              :                                   oprnd0, def);
    4033         4344 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4034              : 
    4035         4344 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    4036         8603 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    4037              :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    4038              :                                   oprnd0, def2);
    4039         4344 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    4040              : 
    4041              :   /* Pattern detected.  */
    4042         4344 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    4043              : 
    4044              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4045         4344 :   var = vect_recog_temp_ssa_var (utype, NULL);
    4046         4344 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    4047              : 
    4048         4344 :   if (!useless_type_conversion_p (type, utype))
    4049              :     {
    4050           47 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    4051           47 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    4052           47 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    4053              :     }
    4054              :   return pattern_stmt;
    4055              : }
    4056              : 
    4057              : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    4058              :    vectorized:
    4059              : 
    4060              :    type a_t;
    4061              :    TYPE b_T, res_T;
    4062              : 
    4063              :    S1 a_t = ;
    4064              :    S2 b_T = ;
    4065              :    S3 res_T = b_T op a_t;
    4066              : 
    4067              :   where type 'TYPE' is a type with different size than 'type',
    4068              :   and op is <<, >> or rotate.
    4069              : 
    4070              :   Also detect cases:
    4071              : 
    4072              :    type a_t;
    4073              :    TYPE b_T, c_T, res_T;
    4074              : 
    4075              :    S0 c_T = ;
    4076              :    S1 a_t = (type) c_T;
    4077              :    S2 b_T = ;
    4078              :    S3 res_T = b_T op a_t;
    4079              : 
    4080              :   Input/Output:
    4081              : 
    4082              :   * STMT_VINFO: The stmt from which the pattern search begins,
    4083              :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4084              :     with a shift/rotate which has same type on both operands, in the
    4085              :     second case just b_T op c_T, in the first case with added cast
    4086              :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4087              : 
    4088              :   Output:
    4089              : 
    4090              :   * TYPE_OUT: The type of the output of this pattern.
    4091              : 
    4092              :   * Return value: A new stmt that will be used to replace the shift/rotate
    4093              :     S3 stmt.  */
    4094              : 
    4095              : static gimple *
    4096     30125302 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4097              :                                         stmt_vec_info stmt_vinfo,
    4098              :                                         tree *type_out)
    4099              : {
    4100     30125302 :   gimple *last_stmt = stmt_vinfo->stmt;
    4101     30125302 :   tree oprnd0, oprnd1, lhs, var;
    4102     30125302 :   gimple *pattern_stmt;
    4103     30125302 :   enum tree_code rhs_code;
    4104              : 
    4105     30125302 :   if (!is_gimple_assign (last_stmt))
    4106              :     return NULL;
    4107              : 
    4108     20380390 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4109     20380390 :   switch (rhs_code)
    4110              :     {
    4111       492837 :     case LSHIFT_EXPR:
    4112       492837 :     case RSHIFT_EXPR:
    4113       492837 :     case LROTATE_EXPR:
    4114       492837 :     case RROTATE_EXPR:
    4115       492837 :       break;
    4116              :     default:
    4117              :       return NULL;
    4118              :     }
    4119              : 
    4120       492837 :   lhs = gimple_assign_lhs (last_stmt);
    4121       492837 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4122       492837 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4123       492837 :   if (TREE_CODE (oprnd1) != SSA_NAME
    4124       100583 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4125        44546 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4126        44259 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4127       537096 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4128        44259 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4129       448578 :     return NULL;
    4130              : 
    4131        44259 :   stmt_vec_info def_vinfo = vinfo->lookup_def (oprnd1);
    4132        44259 :   if (!def_vinfo || STMT_VINFO_DEF_TYPE (def_vinfo) == vect_external_def)
    4133              :     return NULL;
    4134              : 
    4135        41380 :   def_vinfo = vect_stmt_to_vectorize (def_vinfo);
    4136         1066 :   gcc_assert (def_vinfo);
    4137              : 
    4138        41380 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4139        41380 :   if (*type_out == NULL_TREE)
    4140              :     return NULL;
    4141              : 
    4142        29733 :   tree def = NULL_TREE;
    4143        29733 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4144        19125 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4145              :     {
    4146         5125 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4147         5125 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4148         5125 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4149         1098 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4150              :         {
    4151         1098 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4152         1098 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4153              :             def = rhs1;
    4154              :           else
    4155              :             {
    4156         1011 :               tree mask
    4157         1011 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4158         1011 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4159         1011 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4160         1011 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4161         1011 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4162         1011 :                                                            TREE_TYPE (rhs1));
    4163         1011 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4164              :             }
    4165              :         }
    4166              :     }
    4167              : 
    4168         1098 :   if (def == NULL_TREE)
    4169              :     {
    4170        28635 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4171        28635 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4172        28635 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4173              :     }
    4174              : 
    4175              :   /* Pattern detected.  */
    4176        29733 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4177              : 
    4178              :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4179        29733 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4180        29733 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4181              : 
    4182        29733 :   return pattern_stmt;
    4183              : }
    4184              : 
    4185              : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4186              :    needed by the multiplication-by-immediate synthesis algorithm described by
    4187              :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4188              :    present.  Return true iff the target supports all the steps.  */
    4189              : 
    4190              : static bool
    4191       278259 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4192              :                                  tree vectype, bool synth_shift_p)
    4193              : {
    4194       278259 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4195              :     return false;
    4196              : 
    4197       278259 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4198       278259 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4199              : 
    4200       278259 :   if (var == negate_variant
    4201       278259 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4202              :     return false;
    4203              : 
    4204              :   /* If we must synthesize shifts with additions make sure that vector
    4205              :      addition is available.  */
    4206       277764 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4207              :     return false;
    4208              : 
    4209       136668 :   for (int i = 1; i < alg->ops; i++)
    4210              :     {
    4211       103592 :       switch (alg->op[i])
    4212              :         {
    4213              :         case alg_shift:
    4214              :           break;
    4215        25873 :         case alg_add_t_m2:
    4216        25873 :         case alg_add_t2_m:
    4217        25873 :         case alg_add_factor:
    4218        25873 :           if (!supports_vplus)
    4219              :             return false;
    4220              :           break;
    4221        16438 :         case alg_sub_t_m2:
    4222        16438 :         case alg_sub_t2_m:
    4223        16438 :         case alg_sub_factor:
    4224        16438 :           if (!supports_vminus)
    4225              :             return false;
    4226              :           break;
    4227              :         case alg_unknown:
    4228              :         case alg_m:
    4229              :         case alg_zero:
    4230              :         case alg_impossible:
    4231              :           return false;
    4232            0 :         default:
    4233            0 :           gcc_unreachable ();
    4234              :         }
    4235              :     }
    4236              : 
    4237              :   return true;
    4238              : }
    4239              : 
    4240              : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4241              :    putting the final result in DEST.  Append all statements but the last into
    4242              :    VINFO.  Return the last statement.  */
    4243              : 
    4244              : static gimple *
    4245            0 : synth_lshift_by_additions (vec_info *vinfo,
    4246              :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4247              :                            stmt_vec_info stmt_info)
    4248              : {
    4249            0 :   HOST_WIDE_INT i;
    4250            0 :   tree itype = TREE_TYPE (op);
    4251            0 :   tree prev_res = op;
    4252            0 :   gcc_assert (amnt >= 0);
    4253            0 :   for (i = 0; i < amnt; i++)
    4254              :     {
    4255            0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4256              :                       : dest;
    4257            0 :       gimple *stmt
    4258            0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4259            0 :       prev_res = tmp_var;
    4260            0 :       if (i < amnt - 1)
    4261            0 :         append_pattern_def_seq (vinfo, stmt_info, stmt);
    4262              :       else
    4263            0 :         return stmt;
    4264              :     }
    4265            0 :   gcc_unreachable ();
    4266              :   return NULL;
    4267              : }
    4268              : 
    4269              : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4270              :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4271              :    the process if necessary.  Append the resulting assignment statements
    4272              :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4273              :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4274              :    left shifts using additions.  */
    4275              : 
    4276              : static tree
    4277        42214 : apply_binop_and_append_stmt (vec_info *vinfo,
    4278              :                              tree_code code, tree op1, tree op2,
    4279              :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    4280              : {
    4281        42214 :   if (integer_zerop (op2)
    4282        42214 :       && (code == LSHIFT_EXPR
    4283        36654 :           || code == PLUS_EXPR))
    4284              :     {
    4285        36654 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4286              :       return op1;
    4287              :     }
    4288              : 
    4289         5560 :   gimple *stmt;
    4290         5560 :   tree itype = TREE_TYPE (op1);
    4291         5560 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4292              : 
    4293         5560 :   if (code == LSHIFT_EXPR
    4294         5560 :       && synth_shift_p)
    4295              :     {
    4296            0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4297            0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
    4298            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4299            0 :       return tmp_var;
    4300              :     }
    4301              : 
    4302         5560 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4303         5560 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4304         5560 :   return tmp_var;
    4305              : }
    4306              : 
    4307              : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4308              :    and simple arithmetic operations to be vectorized.  Record the statements
    4309              :    produced in STMT_VINFO and return the last statement in the sequence or
    4310              :    NULL if it's not possible to synthesize such a multiplication.
    4311              :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4312              :    works on tree-ssa form.  */
    4313              : 
    4314              : static gimple *
    4315       280963 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4316              :                              stmt_vec_info stmt_vinfo)
    4317              : {
    4318       280963 :   tree itype = TREE_TYPE (op);
    4319       280963 :   machine_mode mode = TYPE_MODE (itype);
    4320       280963 :   struct algorithm alg;
    4321       280963 :   mult_variant variant;
    4322       280963 :   if (!tree_fits_shwi_p (val))
    4323              :     return NULL;
    4324              : 
    4325              :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4326              :      signed overflow where the original operation didn't.  Perform the
    4327              :      operations on an unsigned type and cast back to avoid this.
    4328              :      In the future we may want to relax this for synthesis algorithms
    4329              :      that we can prove do not cause unexpected overflow.  */
    4330       278264 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4331              : 
    4332        53058 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4333       278264 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4334       278264 :   if (!vectype)
    4335              :     return NULL;
    4336              : 
    4337              :   /* Targets that don't support vector shifts but support vector additions
    4338              :      can synthesize shifts that way.  */
    4339       278264 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4340              : 
    4341       278264 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4342              :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4343              :      The vectorizer's benefit analysis will decide whether it's beneficial
    4344              :      to do this.  */
    4345       556528 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4346       278264 :                                        ? TYPE_MODE (vectype) : mode,
    4347              :                                        hwval, &alg, &variant, MAX_COST);
    4348       278264 :   if (!possible)
    4349              :     return NULL;
    4350              : 
    4351       278264 :   if (vect_is_reduction (stmt_vinfo))
    4352              :     {
    4353           16 :       int op_uses = alg.op[0] != alg_zero;
    4354           35 :       for (int i = 1; i < alg.ops; i++)
    4355           22 :         switch (alg.op[i])
    4356              :           {
    4357            4 :           case alg_add_t_m2:
    4358            4 :           case alg_sub_t_m2:
    4359            4 :             if (synth_shift_p && alg.log[i])
    4360              :               return NULL;
    4361              :             else
    4362            4 :               op_uses++;
    4363            4 :             break;
    4364            0 :           case alg_add_t2_m:
    4365            0 :           case alg_sub_t2_m:
    4366            0 :             op_uses++;
    4367              :             /* Fallthru.  */
    4368           18 :           case alg_shift:
    4369           18 :             if (synth_shift_p && alg.log[i])
    4370              :               return NULL;
    4371              :             break;
    4372              :           case alg_add_factor:
    4373              :           case alg_sub_factor:
    4374              :             return NULL;
    4375              :           default:
    4376              :             break;
    4377              :           }
    4378           13 :       if (variant == add_variant)
    4379            0 :         op_uses++;
    4380              :       /* When we'll synthesize more than a single use of the reduction
    4381              :          operand the reduction constraints are violated.  Avoid this
    4382              :          situation.  */
    4383           13 :       if (op_uses > 1)
    4384              :         return NULL;
    4385              :     }
    4386              : 
    4387       278259 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4388              :     return NULL;
    4389              : 
    4390        33076 :   tree accumulator;
    4391              : 
    4392              :   /* Clear out the sequence of statements so we can populate it below.  */
    4393        33076 :   gimple *stmt = NULL;
    4394              : 
    4395        33076 :   if (cast_to_unsigned_p)
    4396              :     {
    4397        11085 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4398        11085 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4399        11085 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4400        11085 :       op = tmp_op;
    4401              :     }
    4402              : 
    4403        33076 :   if (alg.op[0] == alg_zero)
    4404          205 :     accumulator = build_int_cst (multtype, 0);
    4405              :   else
    4406              :     accumulator = op;
    4407              : 
    4408        33076 :   bool needs_fixup = (variant == negate_variant)
    4409        33076 :                       || (variant == add_variant);
    4410              : 
    4411       136507 :   for (int i = 1; i < alg.ops; i++)
    4412              :     {
    4413       103431 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4414       103431 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4415       103431 :       tree tmp_var = NULL_TREE;
    4416              : 
    4417       103431 :       switch (alg.op[i])
    4418              :         {
    4419        61217 :         case alg_shift:
    4420        61217 :           if (synth_shift_p)
    4421            0 :             stmt
    4422            0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4423            0 :                                            alg.log[i], stmt_vinfo);
    4424              :           else
    4425        61217 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4426              :                                          shft_log);
    4427              :           break;
    4428        21156 :         case alg_add_t_m2:
    4429        21156 :           tmp_var
    4430        21156 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4431              :                                            stmt_vinfo, synth_shift_p);
    4432        21156 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4433              :                                        tmp_var);
    4434        21156 :           break;
    4435        15667 :         case alg_sub_t_m2:
    4436        15667 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4437              :                                                  shft_log, stmt_vinfo,
    4438              :                                                  synth_shift_p);
    4439              :           /* In some algorithms the first step involves zeroing the
    4440              :              accumulator.  If subtracting from such an accumulator
    4441              :              just emit the negation directly.  */
    4442        15667 :           if (integer_zerop (accumulator))
    4443          205 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4444              :           else
    4445        15462 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4446              :                                         tmp_var);
    4447              :           break;
    4448            0 :         case alg_add_t2_m:
    4449            0 :           tmp_var
    4450            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4451              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4452            0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4453            0 :           break;
    4454            0 :         case alg_sub_t2_m:
    4455            0 :           tmp_var
    4456            0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4457              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4458            0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4459            0 :           break;
    4460         4655 :         case alg_add_factor:
    4461         4655 :           tmp_var
    4462         4655 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4463              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4464         4655 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4465              :                                        tmp_var);
    4466         4655 :           break;
    4467          736 :         case alg_sub_factor:
    4468          736 :           tmp_var
    4469          736 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4470              :                                            shft_log, stmt_vinfo, synth_shift_p);
    4471          736 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4472              :                                       accumulator);
    4473          736 :           break;
    4474            0 :         default:
    4475            0 :           gcc_unreachable ();
    4476              :         }
    4477              :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4478              :          but rather return it directly.  */
    4479              : 
    4480       103431 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4481        81705 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4482       103431 :       accumulator = accum_tmp;
    4483              :     }
    4484        33076 :   if (variant == negate_variant)
    4485              :     {
    4486          382 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4487          382 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4488          382 :       accumulator = accum_tmp;
    4489          382 :       if (cast_to_unsigned_p)
    4490          127 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4491              :     }
    4492        32694 :   else if (variant == add_variant)
    4493              :     {
    4494           74 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4495           74 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4496           74 :       accumulator = accum_tmp;
    4497           74 :       if (cast_to_unsigned_p)
    4498           64 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4499              :     }
    4500              :   /* Move back to a signed if needed.  */
    4501        32811 :   if (cast_to_unsigned_p)
    4502              :     {
    4503        11085 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4504        11085 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4505              :     }
    4506              : 
    4507              :   return stmt;
    4508              : }
    4509              : 
    4510              : /* Detect multiplication by constant and convert it into a sequence of
    4511              :    shifts and additions, subtractions, negations.  We reuse the
    4512              :    choose_mult_variant algorithms from expmed.cc
    4513              : 
    4514              :    Input/Output:
    4515              : 
    4516              :    STMT_VINFO: The stmt from which the pattern search begins,
    4517              :    i.e. the mult stmt.
    4518              : 
    4519              :  Output:
    4520              : 
    4521              :   * TYPE_OUT: The type of the output of this pattern.
    4522              : 
    4523              :   * Return value: A new stmt that will be used to replace
    4524              :     the multiplication.  */
    4525              : 
    4526              : static gimple *
    4527     30309333 : vect_recog_mult_pattern (vec_info *vinfo,
    4528              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4529              : {
    4530     30309333 :   gimple *last_stmt = stmt_vinfo->stmt;
    4531     30309333 :   tree oprnd0, oprnd1, vectype, itype;
    4532     30309333 :   gimple *pattern_stmt;
    4533              : 
    4534     30309333 :   if (!is_gimple_assign (last_stmt))
    4535              :     return NULL;
    4536              : 
    4537     20564421 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4538              :     return NULL;
    4539              : 
    4540      1378356 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4541      1378356 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4542      1378356 :   itype = TREE_TYPE (oprnd0);
    4543              : 
    4544      1378356 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4545      1378293 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4546       862897 :       || !INTEGRAL_TYPE_P (itype)
    4547      2241253 :       || !type_has_mode_precision_p (itype))
    4548       515511 :     return NULL;
    4549              : 
    4550       862845 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4551       862845 :   if (vectype == NULL_TREE)
    4552              :     return NULL;
    4553              : 
    4554              :   /* If the target can handle vectorized multiplication natively,
    4555              :      don't attempt to optimize this.  */
    4556       704543 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4557       704543 :   if (mul_optab != unknown_optab
    4558       704543 :       && can_implement_p (mul_optab, TYPE_MODE (vectype)))
    4559              :     return NULL;
    4560              : 
    4561       280963 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4562              :                                               oprnd0, oprnd1, stmt_vinfo);
    4563       280963 :   if (!pattern_stmt)
    4564              :     return NULL;
    4565              : 
    4566              :   /* Pattern detected.  */
    4567        33076 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4568              : 
    4569        33076 :   *type_out = vectype;
    4570              : 
    4571        33076 :   return pattern_stmt;
    4572              : }
    4573              : 
    4574              : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
    4575              : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
    4576              : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4577              : 
    4578              : extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
    4579              : 
    4580              : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
    4581              : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
    4582              : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4583              : 
    4584              : static gimple *
    4585          262 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
    4586              :                                      internal_fn fn, tree *type_out,
    4587              :                                      tree lhs, tree op_0, tree op_1)
    4588              : {
    4589          262 :   tree itype = TREE_TYPE (op_0);
    4590          262 :   tree otype = TREE_TYPE (lhs);
    4591          262 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4592          262 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4593              : 
    4594          262 :   if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4595          262 :     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    4596              :     {
    4597           59 :       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
    4598           59 :       tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
    4599              : 
    4600           59 :       gimple_call_set_lhs (call, in_ssa);
    4601           59 :       gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4602           59 :       gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
    4603              : 
    4604           59 :       *type_out = v_otype;
    4605              : 
    4606           59 :       if (types_compatible_p (itype, otype))
    4607              :         return call;
    4608              :       else
    4609              :         {
    4610            0 :           append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
    4611            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4612              : 
    4613            0 :           return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
    4614              :         }
    4615              :     }
    4616              : 
    4617              :   return NULL;
    4618              : }
    4619              : 
    4620              : /*
    4621              :  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
    4622              :  *   _7 = _4 + _6;
    4623              :  *   _8 = _4 > _7;
    4624              :  *   _9 = (long unsigned int) _8;
    4625              :  *   _10 = -_9;
    4626              :  *   _12 = _7 | _10;
    4627              :  *
    4628              :  * And then simplied to
    4629              :  *   _12 = .SAT_ADD (_4, _6);
    4630              :  */
    4631              : 
    4632              : static gimple *
    4633     30378428 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4634              :                             tree *type_out)
    4635              : {
    4636     30378428 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4637              : 
    4638     30378428 :   if (!is_gimple_assign (last_stmt))
    4639              :     return NULL;
    4640              : 
    4641     20633516 :   tree ops[2];
    4642     20633516 :   tree lhs = gimple_assign_lhs (last_stmt);
    4643              : 
    4644     20633516 :   if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
    4645     20633516 :       || gimple_signed_integer_sat_add (lhs, ops, NULL))
    4646              :     {
    4647           46 :       if (TREE_CODE (ops[1]) == INTEGER_CST)
    4648           12 :         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
    4649              : 
    4650           46 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4651              :                                                           IFN_SAT_ADD, type_out,
    4652              :                                                           lhs, ops[0], ops[1]);
    4653           46 :       if (stmt)
    4654              :         {
    4655           28 :           vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
    4656           28 :           return stmt;
    4657              :         }
    4658              :     }
    4659              : 
    4660              :   return NULL;
    4661              : }
    4662              : 
    4663              : /*
    4664              :  * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
    4665              :  * the benchmark zip.  Aka:
    4666              :  *
    4667              :  *   unsigned int _1;
    4668              :  *   unsigned int _2;
    4669              :  *   unsigned short int _4;
    4670              :  *   _9 = (unsigned short int).SAT_SUB (_1, _2);
    4671              :  *
    4672              :  *   if _1 is known to be in the range of unsigned short int.  For example
    4673              :  *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
    4674              :  *   truncation to:
    4675              :  *
    4676              :  *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
    4677              :  *   _9 = .SAT_SUB (_4, _3);
    4678              :  *
    4679              :  *   Then,  we can better vectorized code and avoid the unnecessary narrowing
    4680              :  *   stmt during vectorization with below stmt(s).
    4681              :  *
    4682              :  *   _3 = .SAT_TRUNC(_2); // SI => HI
    4683              :  *   _9 = .SAT_SUB (_4, _3);
    4684              :  */
    4685              : static void
    4686          216 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
    4687              :                                       stmt_vec_info stmt_vinfo,
    4688              :                                       tree lhs, tree *ops)
    4689              : {
    4690          216 :   tree otype = TREE_TYPE (lhs);
    4691          216 :   tree itype = TREE_TYPE (ops[0]);
    4692          216 :   unsigned itype_prec = TYPE_PRECISION (itype);
    4693          216 :   unsigned otype_prec = TYPE_PRECISION (otype);
    4694              : 
    4695          216 :   if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
    4696          216 :     return;
    4697              : 
    4698            0 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4699            0 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4700            0 :   tree_pair v_pair = tree_pair (v_otype, v_itype);
    4701              : 
    4702            0 :   if (v_otype == NULL_TREE || v_itype == NULL_TREE
    4703            0 :     || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
    4704              :                                         OPTIMIZE_FOR_BOTH))
    4705            0 :     return;
    4706              : 
    4707              :   /* 1. Find the _4 and update ops[0] as above example.  */
    4708            0 :   vect_unpromoted_value unprom;
    4709            0 :   tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
    4710              : 
    4711            0 :   if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
    4712              :     return;
    4713              : 
    4714            0 :   ops[0] = tmp;
    4715              : 
    4716              :   /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
    4717            0 :   tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4718            0 :   gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
    4719              : 
    4720            0 :   gimple_call_set_lhs (call, trunc_lhs_ssa);
    4721            0 :   gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4722            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
    4723              : 
    4724            0 :   ops[1] = trunc_lhs_ssa;
    4725              : }
    4726              : 
    4727              : /*
    4728              :  * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
    4729              :  * Unsigned:
    4730              :  *   _7 = _1 >= _2;
    4731              :  *   _8 = _1 - _2;
    4732              :  *   _10 = (long unsigned int) _7;
    4733              :  *   _9 = _8 * _10;
    4734              :  *
    4735              :  * And then simplied to
    4736              :  *   _9 = .SAT_SUB (_1, _2);
    4737              :  *
    4738              :  * Signed:
    4739              :  *   x.0_4 = (unsigned char) x_16;
    4740              :  *   y.1_5 = (unsigned char) y_18;
    4741              :  *   _6 = x.0_4 - y.1_5;
    4742              :  *   minus_19 = (int8_t) _6;
    4743              :  *   _7 = x_16 ^ y_18;
    4744              :  *   _8 = x_16 ^ minus_19;
    4745              :  *   _44 = _7 < 0;
    4746              :  *   _23 = x_16 < 0;
    4747              :  *   _24 = (signed char) _23;
    4748              :  *   _58 = (unsigned char) _24;
    4749              :  *   _59 = -_58;
    4750              :  *   _25 = (signed char) _59;
    4751              :  *   _26 = _25 ^ 127;
    4752              :  *   _42 = _8 < 0;
    4753              :  *   _41 = _42 & _44;
    4754              :  *   iftmp.2_11 = _41 ? _26 : minus_19;
    4755              :  *
    4756              :  * And then simplied to
    4757              :  *   iftmp.2_11 = .SAT_SUB (x_16, y_18);
    4758              :  */
    4759              : 
    4760              : static gimple *
    4761     30378400 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4762              :                             tree *type_out)
    4763              : {
    4764     30378400 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4765              : 
    4766     30378400 :   if (!is_gimple_assign (last_stmt))
    4767              :     return NULL;
    4768              : 
    4769     20633488 :   tree ops[2];
    4770     20633488 :   tree lhs = gimple_assign_lhs (last_stmt);
    4771              : 
    4772     20633488 :   if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
    4773     20633488 :       || gimple_signed_integer_sat_sub (lhs, ops, NULL))
    4774              :     {
    4775          216 :       vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
    4776          216 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4777              :                                                           IFN_SAT_SUB, type_out,
    4778              :                                                           lhs, ops[0], ops[1]);
    4779          216 :       if (stmt)
    4780              :         {
    4781           31 :           vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
    4782           31 :           return stmt;
    4783              :         }
    4784              :     }
    4785              : 
    4786              :   return NULL;
    4787              : }
    4788              : 
    4789              : /*
    4790              :  * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
    4791              :  *   overflow_5 = x_4(D) > 4294967295;
    4792              :  *   _1 = (unsigned int) x_4(D);
    4793              :  *   _2 = (unsigned int) overflow_5;
    4794              :  *   _3 = -_2;
    4795              :  *   _6 = _1 | _3;
    4796              :  *
    4797              :  * And then simplied to
    4798              :  *   _6 = .SAT_TRUNC (x_4(D));
    4799              :  */
    4800              : 
    4801              : static gimple *
    4802     30378369 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4803              :                               tree *type_out)
    4804              : {
    4805     30378369 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4806              : 
    4807     30378369 :   if (!is_gimple_assign (last_stmt))
    4808              :     return NULL;
    4809              : 
    4810     20633457 :   tree ops[1];
    4811     20633457 :   tree lhs = gimple_assign_lhs (last_stmt);
    4812     20633457 :   tree otype = TREE_TYPE (lhs);
    4813              : 
    4814     20633457 :   if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
    4815     20633457 :        && type_has_mode_precision_p (otype))
    4816              :     {
    4817            8 :       tree itype = TREE_TYPE (ops[0]);
    4818            8 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4819            8 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4820            8 :       internal_fn fn = IFN_SAT_TRUNC;
    4821              : 
    4822            8 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4823           16 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4824              :                                            OPTIMIZE_FOR_BOTH))
    4825              :         {
    4826            0 :           tree temp = vect_recog_temp_ssa_var (itype, NULL);
    4827            0 :           gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, itype, build_zero_cst(itype), ops[0]));
    4828            0 :           append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
    4829              : 
    4830            0 :           gcall *call = gimple_build_call_internal (fn, 1, temp);
    4831            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4832              : 
    4833            0 :           gimple_call_set_lhs (call, out_ssa);
    4834            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4835            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4836              : 
    4837            0 :           *type_out = v_otype;
    4838              : 
    4839            0 :           return call;
    4840              :         }
    4841              : 
    4842              :     }
    4843              : 
    4844     20633457 :   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    4845     20633183 :        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
    4846     20633457 :       && type_has_mode_precision_p (otype))
    4847              :     {
    4848          262 :       tree itype = TREE_TYPE (ops[0]);
    4849          262 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4850          262 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4851          262 :       internal_fn fn = IFN_SAT_TRUNC;
    4852              : 
    4853          256 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4854          518 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4855              :                                            OPTIMIZE_FOR_BOTH))
    4856              :         {
    4857            0 :           gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
    4858            0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4859              : 
    4860            0 :           gimple_call_set_lhs (call, out_ssa);
    4861            0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4862            0 :           gimple_set_location (call, gimple_location (last_stmt));
    4863              : 
    4864            0 :           *type_out = v_otype;
    4865              : 
    4866            0 :           return call;
    4867              :         }
    4868              :     }
    4869              : 
    4870              :   return NULL;
    4871              : }
    4872              : 
    4873              : 
    4874              : /* Function add_code_for_floorceilround_divmod
    4875              :    A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
    4876              :    FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
    4877              :    ROUND_DIV_EXPR
    4878              :    The quotient and remainder are needed for implemented these operators.
    4879              :    FLOOR cases
    4880              :    r = x %[fl] y; r = x/[fl] y;
    4881              :    is
    4882              :    r = x % y; if (r && (x ^ y) < 0) r += y;
    4883              :    r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
    4884              :    Produce following sequence
    4885              :    v0 = x^y
    4886              :    v1 = -r
    4887              :    v2 = r | -r
    4888              :    v3 = v0 & v2
    4889              :    v4 = v3 < 0
    4890              :    if (floor_mod)
    4891              :      v5 = v4 ? y : 0
    4892              :      v6 = r + v5
    4893              :    if (floor_div)
    4894              :      v5 = v4 ? 1 : 0
    4895              :      v6 = d - 1
    4896              :    Similar sequences of vector instructions are produces for following cases
    4897              :    CEIL cases
    4898              :    r = x %[cl] y; r = x/[cl] y;
    4899              :    is
    4900              :    r = x % y; if (r && (x ^ y) >= 0) r -= y;
    4901              :    r = x % y; if (r) r -= y; (unsigned)
    4902              :    r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
    4903              :    r = x % y; d = x/y; if (r) d++; (unsigned)
    4904              :    ROUND cases
    4905              :    r = x %[rd] y; r = x/[rd] y;
    4906              :    is
    4907              :    r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
    4908              :    r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
    4909              :    r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
    4910              :    r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
    4911              :    Inputs:
    4912              :      VECTYPE: Vector type of the operands
    4913              :      STMT_VINFO: Statement where pattern begins
    4914              :      RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
    4915              :      Q: The quotient of division
    4916              :      R: Remainder of division
    4917              :      OPRDN0/OPRND1: Actual operands involved
    4918              :      ITYPE: tree type of oprnd0
    4919              :    Output:
    4920              :      NULL if vectorization not possible
    4921              :      Gimple statement based on rhs_code
    4922              : */
    4923              : static gimple *
    4924          431 : add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
    4925              :                                     stmt_vec_info stmt_vinfo,
    4926              :                                     enum tree_code rhs_code, tree q, tree r,
    4927              :                                     tree oprnd0, tree oprnd1, tree itype)
    4928              : {
    4929          431 :   gimple *def_stmt;
    4930          431 :   tree mask_vectype = truth_type_for (vectype);
    4931          431 :   if (!mask_vectype)
    4932              :     return NULL;
    4933          431 :   tree bool_cond;
    4934          431 :   bool unsigned_p = TYPE_UNSIGNED (itype);
    4935              : 
    4936          431 :   switch (rhs_code)
    4937              :     {
    4938          395 :     case FLOOR_MOD_EXPR:
    4939          395 :     case FLOOR_DIV_EXPR:
    4940          395 :     case CEIL_MOD_EXPR:
    4941          395 :     case CEIL_DIV_EXPR:
    4942          395 :       {
    4943          395 :         if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
    4944          363 :             || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
    4945          363 :             || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
    4946          363 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    4947          363 :             || !target_has_vecop_for_code (MINUS_EXPR, vectype)
    4948          363 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    4949          631 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    4950          159 :           return NULL;
    4951          236 :         if (unsigned_p)
    4952              :           {
    4953           18 :             gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4954              : 
    4955           18 :             if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
    4956              :               return NULL;
    4957           18 :             bool is_mod = rhs_code == CEIL_MOD_EXPR;
    4958              :             // r > 0
    4959           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    4960           18 :             def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
    4961              :                                             build_int_cst (itype, 0));
    4962           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    4963              :                                     itype);
    4964              : 
    4965              :             // (r > 0) ? y : 0 (mod)
    4966              :             // (r > 0) ? 1 : 0 (ceil)
    4967           18 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    4968           18 :             def_stmt
    4969           27 :               = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    4970            9 :                                      is_mod ? oprnd1 : build_int_cst (itype, 1),
    4971              :                                      build_int_cst (itype, 0));
    4972           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4973              : 
    4974              :             // r -= (r > 0) ? y : 0 (mod)
    4975              :             // d += (x^y < 0 && r) ? -1 : 0 (ceil)
    4976           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    4977           27 :             return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
    4978           18 :                                         is_mod ? r : q, extr_cond);
    4979              :           }
    4980              :         else
    4981              :           {
    4982          218 :             bool ceil_p
    4983          218 :               = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
    4984          218 :             if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
    4985              :               return NULL;
    4986              :             // x ^ y
    4987          218 :             tree xort = vect_recog_temp_ssa_var (itype, NULL);
    4988          218 :             def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
    4989          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4990              : 
    4991          218 :             tree cond_reg = xort;
    4992              :             // ~(x ^ y) (ceil)
    4993          218 :             if (ceil_p)
    4994              :               {
    4995           18 :                 cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    4996           18 :                 def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
    4997           18 :                 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4998              :               }
    4999              : 
    5000              :             // -r
    5001          218 :             tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
    5002          218 :             def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
    5003          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5004              : 
    5005              :             // r | -r , sign bit is set if r!=0
    5006          218 :             tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
    5007          218 :             def_stmt
    5008          218 :               = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
    5009          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5010              : 
    5011              :             // (x ^ y) & (r | -r)
    5012              :             // ~(x ^ y) & (r | -r) (ceil)
    5013          218 :             tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
    5014          218 :             def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
    5015              :                                             r_or_negr, cond_reg);
    5016          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5017              : 
    5018              :             // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
    5019          218 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5020          218 :             def_stmt
    5021          218 :               = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
    5022              :                                      build_int_cst (itype, 0));
    5023          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5024              :                                     itype);
    5025              : 
    5026              :             // (x^y < 0 && r) ? y : 0 (mod)
    5027              :             // (x^y < 0 && r) ? -1 : 0 (div)
    5028          218 :             bool is_mod
    5029          218 :               = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
    5030          218 :             tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
    5031          258 :             def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
    5032              :                                             is_mod ? oprnd1
    5033           40 :                                                    : build_int_cst (itype, -1),
    5034              :                                             build_int_cst (itype, 0));
    5035          218 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5036              : 
    5037              :             // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
    5038              :             // d += (x^y < 0 && r) ? -1 : 0 (floor div)
    5039              :             // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
    5040              :             // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
    5041          218 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5042          436 :             return gimple_build_assign (result,
    5043          218 :                                         (rhs_code == FLOOR_MOD_EXPR
    5044          218 :                                          || rhs_code == FLOOR_DIV_EXPR)
    5045              :                                           ? PLUS_EXPR
    5046              :                                           : MINUS_EXPR,
    5047          218 :                                         is_mod ? r : q, extr_cond);
    5048              :           }
    5049              :       }
    5050           36 :     case ROUND_MOD_EXPR:
    5051           36 :     case ROUND_DIV_EXPR:
    5052           36 :       {
    5053           36 :         if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
    5054           36 :             || !target_has_vecop_for_code (PLUS_EXPR, vectype)
    5055           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
    5056           36 :             || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
    5057           72 :             || !expand_vec_cond_expr_p (vectype, mask_vectype))
    5058            0 :           return NULL;
    5059              : 
    5060           36 :         bool is_mod = rhs_code == ROUND_MOD_EXPR;
    5061           36 :         HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5062           36 :         unsigned HOST_WIDE_INT abs_d
    5063              :           = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
    5064           36 :         unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
    5065           36 :         if (!unsigned_p)
    5066              :           {
    5067              :             // check availibility of abs expression for vector
    5068           18 :             if (!target_has_vecop_for_code (ABS_EXPR, vectype))
    5069              :               return NULL;
    5070              :             // abs (r)
    5071           18 :             tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
    5072           18 :             def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
    5073           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5074              : 
    5075              :             // abs (r) > (abs (y-1) >> 1)
    5076           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5077           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
    5078           18 :                                             build_int_cst (itype, mid_d));
    5079           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5080              :                                     itype);
    5081              : 
    5082              :             // x ^ y
    5083           18 :             tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
    5084           18 :             def_stmt
    5085           18 :               = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
    5086           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5087              : 
    5088              :             // x ^ y < 0
    5089           18 :             bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5090           18 :             def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
    5091              :                                             build_int_cst (itype, 0));
    5092           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5093              :                                     itype);
    5094              : 
    5095              :             // x ^ y < 0 ? y : -y (mod)
    5096              :             // x ^ y < 0 ? -1 : 1 (div)
    5097           18 :             tree val1 = vect_recog_temp_ssa_var (itype, NULL);
    5098           18 :             def_stmt
    5099           36 :               = gimple_build_assign (val1, COND_EXPR, bool_cond,
    5100           27 :                                      build_int_cst (itype, is_mod ? d : -1),
    5101           18 :                                      build_int_cst (itype, is_mod ? -d : 1));
    5102           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5103           18 :             int precision = TYPE_PRECISION (itype);
    5104           18 :             wide_int wmask = wi::mask (precision, false, precision);
    5105              : 
    5106              :             // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
    5107           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5108           36 :             def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
    5109           18 :                                             wide_int_to_tree (itype, wmask),
    5110              :                                             build_int_cst (itype, 0));
    5111           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5112              : 
    5113           18 :             tree fval = vect_recog_temp_ssa_var (itype, NULL);
    5114           18 :             def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
    5115           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5116              : 
    5117           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5118           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5119              :                                         fval);
    5120           18 :           }
    5121              :         else
    5122              :           {
    5123              :             // r > (y-1 >> 1)
    5124           18 :             tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5125           18 :             def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
    5126           18 :                                             build_int_cst (itype, mid_d));
    5127           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
    5128              :                                     itype);
    5129              : 
    5130              :             // (r > (y-1)>>1) ? -d : 1
    5131           18 :             tree val2 = vect_recog_temp_ssa_var (itype, NULL);
    5132           18 :             def_stmt
    5133           36 :               = gimple_build_assign (val2, COND_EXPR, round_p,
    5134           18 :                                      build_int_cst (itype, is_mod ? -d : 1),
    5135              :                                      build_int_cst (itype, 0));
    5136           18 :             append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5137              : 
    5138           18 :             tree result = vect_recog_temp_ssa_var (itype, NULL);
    5139           27 :             return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
    5140           18 :                                         val2);
    5141              :           }
    5142              :       }
    5143              :     default:
    5144              :       return NULL;
    5145              :     }
    5146              : }
    5147              : 
    5148              : /* Detect a signed division by a constant that wouldn't be
    5149              :    otherwise vectorized:
    5150              : 
    5151              :    type a_t, b_t;
    5152              : 
    5153              :    S1 a_t = b_t / N;
    5154              : 
    5155              :   where type 'type' is an integral type and N is a constant.
    5156              : 
    5157              :   Similarly handle modulo by a constant:
    5158              : 
    5159              :    S4 a_t = b_t % N;
    5160              : 
    5161              :   Input/Output:
    5162              : 
    5163              :   * STMT_VINFO: The stmt from which the pattern search begins,
    5164              :     i.e. the division stmt.  S1 is replaced by if N is a power
    5165              :     of two constant and type is signed:
    5166              :   S3  y_t = b_t < 0 ? N - 1 : 0;
    5167              :   S2  x_t = b_t + y_t;
    5168              :   S1' a_t = x_t >> log2 (N);
    5169              : 
    5170              :     S4 is replaced if N is a power of two constant and
    5171              :     type is signed by (where *_T temporaries have unsigned type):
    5172              :   S9  y_T = b_t < 0 ? -1U : 0U;
    5173              :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    5174              :   S7  z_t = (type) z_T;
    5175              :   S6  w_t = b_t + z_t;
    5176              :   S5  x_t = w_t & (N - 1);
    5177              :   S4' a_t = x_t - z_t;
    5178              : 
    5179              :   Output:
    5180              : 
    5181              :   * TYPE_OUT: The type of the output of this pattern.
    5182              : 
    5183              :   * Return value: A new stmt that will be used to replace the division
    5184              :     S1 or modulo S4 stmt.  */
    5185              : 
    5186              : static gimple *
    5187     30125215 : vect_recog_divmod_pattern (vec_info *vinfo,
    5188              :                            stmt_vec_info stmt_vinfo, tree *type_out)
    5189              : {
    5190     30125215 :   gimple *last_stmt = stmt_vinfo->stmt;
    5191     30125215 :   tree oprnd0, oprnd1, vectype, itype, cond;
    5192     30125215 :   gimple *pattern_stmt = NULL;
    5193     30125215 :   gimple *def_stmt = NULL;
    5194     30125215 :   enum tree_code rhs_code;
    5195     30125215 :   optab optab;
    5196     30125215 :   tree q, cst;
    5197     30125215 :   int prec;
    5198              : 
    5199     30125215 :   if (!is_gimple_assign (last_stmt)
    5200              :       /* The pattern will disrupt the reduction chain with multiple uses.  */
    5201     30125215 :       || vect_is_reduction (stmt_vinfo))
    5202              :     return NULL;
    5203              : 
    5204     20300751 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5205     20300751 :   switch (rhs_code)
    5206              :     {
    5207       269892 :     case TRUNC_DIV_EXPR:
    5208       269892 :     case EXACT_DIV_EXPR:
    5209       269892 :     case TRUNC_MOD_EXPR:
    5210       269892 :     case FLOOR_MOD_EXPR:
    5211       269892 :     case FLOOR_DIV_EXPR:
    5212       269892 :     case CEIL_MOD_EXPR:
    5213       269892 :     case CEIL_DIV_EXPR:
    5214       269892 :     case ROUND_MOD_EXPR:
    5215       269892 :     case ROUND_DIV_EXPR:
    5216       269892 :       break;
    5217              :     default:
    5218              :       return NULL;
    5219              :     }
    5220              : 
    5221       269892 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5222       269892 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5223       269892 :   itype = TREE_TYPE (oprnd0);
    5224       269892 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5225       252284 :       || TREE_CODE (oprnd1) != INTEGER_CST
    5226       158901 :       || TREE_CODE (itype) != INTEGER_TYPE
    5227       428793 :       || !type_has_mode_precision_p (itype))
    5228       110991 :     return NULL;
    5229              : 
    5230       158901 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    5231       158901 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5232       158901 :   if (vectype == NULL_TREE)
    5233              :     return NULL;
    5234              : 
    5235       127340 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    5236              :     {
    5237              :       /* If the target can handle vectorized division or modulo natively,
    5238              :          don't attempt to optimize this, since native division is likely
    5239              :          to give smaller code.  */
    5240         2213 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    5241         2213 :       if (optab != unknown_optab
    5242         2213 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    5243              :         return NULL;
    5244              :     }
    5245              : 
    5246       127340 :   prec = TYPE_PRECISION (itype);
    5247              : 
    5248       254680 :   bool is_flclrd_moddiv_p
    5249       127340 :     = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
    5250              :     || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
    5251       126755 :     || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
    5252       127340 :   if (integer_pow2p (oprnd1))
    5253              :     {
    5254        75487 :       if ((TYPE_UNSIGNED (itype)
    5255           57 :            && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
    5256        75541 :           || tree_int_cst_sgn (oprnd1) != 1)
    5257            3 :         return NULL;
    5258              : 
    5259              :       /* Pattern detected.  */
    5260        75484 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5261              : 
    5262        75484 :       *type_out = vectype;
    5263              : 
    5264              :       /* Check if the target supports this internal function.  */
    5265        75484 :       internal_fn ifn = IFN_DIV_POW2;
    5266        75484 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    5267              :         {
    5268            0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    5269              : 
    5270            0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    5271            0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    5272            0 :           gimple_call_set_lhs (div_stmt, var_div);
    5273            0 :           if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5274              :             {
    5275            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    5276            0 :               tree t1 = vect_recog_temp_ssa_var (itype, NULL);
    5277            0 :               def_stmt
    5278            0 :                 = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
    5279            0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5280            0 :               pattern_stmt
    5281            0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5282              :                                        MINUS_EXPR, oprnd0, t1);
    5283            0 :               if (is_flclrd_moddiv_p)
    5284              :                 {
    5285            0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5286            0 :                   pattern_stmt
    5287            0 :                     = add_code_for_floorceilround_divmod (vectype, vinfo,
    5288              :                                                           stmt_vinfo, rhs_code,
    5289              :                                                           var_div, t1, oprnd0,
    5290              :                                                           oprnd1, itype);
    5291            0 :                   if (pattern_stmt == NULL)
    5292              :                     return NULL;
    5293              :                 }
    5294              :             }
    5295              :           else
    5296              :             pattern_stmt = div_stmt;
    5297            0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    5298              : 
    5299            0 :           return pattern_stmt;
    5300              :         }
    5301              : 
    5302        75484 :       cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5303        75484 :       def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
    5304              :                                       build_int_cst (itype, 0));
    5305        75484 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
    5306              :                               truth_type_for (vectype), itype);
    5307        75484 :       tree div_result = NULL_TREE;
    5308        75484 :       if (rhs_code == TRUNC_DIV_EXPR
    5309        75484 :           || rhs_code == EXACT_DIV_EXPR
    5310              :           || rhs_code == FLOOR_DIV_EXPR
    5311         2724 :           || rhs_code == CEIL_DIV_EXPR
    5312         2565 :           || rhs_code == ROUND_DIV_EXPR)
    5313              :         {
    5314        72931 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    5315        72931 :           tree shift;
    5316        72931 :           def_stmt
    5317        72931 :             = gimple_build_assign (var, COND_EXPR, cond,
    5318              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5319              :                                                 build_int_cst (itype, 1)),
    5320              :                                    build_int_cst (itype, 0));
    5321        72931 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5322        72931 :           var = vect_recog_temp_ssa_var (itype, NULL);
    5323        72931 :           def_stmt
    5324        72931 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    5325              :                                    gimple_assign_lhs (def_stmt));
    5326        72931 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5327              : 
    5328        72931 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    5329        72931 :           div_result = vect_recog_temp_ssa_var (itype, NULL);
    5330        72931 :           pattern_stmt
    5331        72931 :             = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
    5332              :         }
    5333        75484 :       if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5334              :         {
    5335         2724 :           if (rhs_code == FLOOR_DIV_EXPR
    5336              :               || rhs_code == CEIL_DIV_EXPR
    5337         2724 :               || rhs_code == ROUND_DIV_EXPR)
    5338          171 :             append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5339              : 
    5340         2724 :           tree signmask;
    5341         2724 :           if (compare_tree_int (oprnd1, 2) == 0)
    5342              :             {
    5343         1338 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5344         1338 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    5345              :                                               build_int_cst (itype, 1),
    5346              :                                               build_int_cst (itype, 0));
    5347         1338 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5348              :             }
    5349              :           else
    5350              :             {
    5351         1386 :               tree utype
    5352         1386 :                 = build_nonstandard_integer_type (prec, 1);
    5353         1386 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    5354         1386 :               tree shift
    5355         1386 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    5356         1386 :                                         - tree_log2 (oprnd1));
    5357         1386 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    5358              : 
    5359         1386 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    5360              :                                               build_int_cst (utype, -1),
    5361              :                                               build_int_cst (utype, 0));
    5362         1386 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5363         1386 :               var = vect_recog_temp_ssa_var (utype, NULL);
    5364         1386 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    5365              :                                               gimple_assign_lhs (def_stmt),
    5366              :                                               shift);
    5367         1386 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    5368         1386 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    5369         1386 :               def_stmt
    5370         1386 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    5371         1386 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5372              :             }
    5373         2724 :           def_stmt
    5374         2724 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5375              :                                    PLUS_EXPR, oprnd0, signmask);
    5376         2724 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5377         2724 :           def_stmt
    5378         2724 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5379              :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    5380              :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    5381              :                                                 build_int_cst (itype, 1)));
    5382         2724 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5383              : 
    5384         2724 :           tree r = vect_recog_temp_ssa_var (itype, NULL);
    5385         2724 :           pattern_stmt
    5386         2724 :             = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
    5387              :                                    signmask);
    5388         2724 :           if (is_flclrd_moddiv_p)
    5389              :             {
    5390          285 :               append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5391          285 :               pattern_stmt
    5392          285 :                 = add_code_for_floorceilround_divmod (vectype, vinfo,
    5393              :                                                       stmt_vinfo, rhs_code,
    5394              :                                                       div_result, r, oprnd0,
    5395              :                                                       oprnd1, itype);
    5396          285 :               if (pattern_stmt == NULL)
    5397              :                 return NULL;
    5398              :             }
    5399              :         }
    5400              : 
    5401        75325 :       return pattern_stmt;
    5402              :     }
    5403              : 
    5404        51853 :   if ((cst = uniform_integer_cst_p (oprnd1))
    5405        51853 :       && TYPE_UNSIGNED (itype)
    5406              :       && rhs_code == TRUNC_DIV_EXPR
    5407        31560 :       && vectype
    5408        71737 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    5409              :     {
    5410              :       /* We can use the relationship:
    5411              : 
    5412              :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    5413              : 
    5414              :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    5415              :          is therefore a shift right.  When operating in modes that are
    5416              :          multiples of a byte in size, there are two cases:
    5417              : 
    5418              :          (1) N(N+3) is not representable, in which case the question
    5419              :              becomes whether the replacement expression overflows.
    5420              :              It is enough to test that x+N+2 does not overflow,
    5421              :              i.e. that x < MAX-(N+1).
    5422              : 
    5423              :          (2) N(N+3) is representable, in which case it is the (only)
    5424              :              bound that we need to check.
    5425              : 
    5426              :          ??? For now we just handle the case where // (N+1) is a shift
    5427              :          right by half the precision, since some architectures can
    5428              :          optimize the associated addition and shift combinations
    5429              :          into single instructions.  */
    5430              : 
    5431        13587 :       auto wcst = wi::to_wide (cst);
    5432        13587 :       int pow = wi::exact_log2 (wcst + 1);
    5433        13587 :       if (pow == prec / 2)
    5434              :         {
    5435          468 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    5436              : 
    5437          468 :           gimple_ranger ranger;
    5438          468 :           int_range_max r;
    5439              : 
    5440              :           /* Check that no overflow will occur.  If we don't have range
    5441              :              information we can't perform the optimization.  */
    5442              : 
    5443          468 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    5444              :             {
    5445          466 :               wide_int max = r.upper_bound ();
    5446          466 :               wide_int one = wi::shwi (1, prec);
    5447          466 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    5448          466 :               wi::overflow_type ovf;
    5449          466 :               wi::add (max, adder, UNSIGNED, &ovf);
    5450          466 :               if (ovf == wi::OVF_NONE)
    5451              :                 {
    5452          304 :                   *type_out = vectype;
    5453          304 :                   tree tadder = wide_int_to_tree (itype, adder);
    5454          304 :                   tree rshift = wide_int_to_tree (itype, pow);
    5455              : 
    5456          304 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    5457          304 :                   gassign *patt1
    5458          304 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    5459          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5460              : 
    5461          304 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    5462          304 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    5463              :                                                rshift);
    5464          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5465              : 
    5466          304 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    5467          304 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    5468              :                                                oprnd0);
    5469          304 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5470              : 
    5471          304 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    5472          304 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    5473              :                                                       new_lhs3, rshift);
    5474              : 
    5475          304 :                   return pattern_stmt;
    5476              :                 }
    5477          466 :             }
    5478          468 :         }
    5479              :     }
    5480              : 
    5481        51549 :   if (prec > HOST_BITS_PER_WIDE_INT
    5482        51549 :       || integer_zerop (oprnd1))
    5483          418 :     return NULL;
    5484              : 
    5485        51131 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    5486              :     return NULL;
    5487              : 
    5488        14421 :   if (TYPE_UNSIGNED (itype))
    5489              :     {
    5490         9324 :       unsigned HOST_WIDE_INT mh, ml;
    5491         9324 :       int pre_shift, post_shift;
    5492         9324 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    5493         9324 :                                   & GET_MODE_MASK (itype_mode));
    5494         9324 :       tree t1, t2, t3, t4;
    5495              : 
    5496         9324 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    5497              :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    5498           23 :         return NULL;
    5499              : 
    5500              :       /* Find a suitable multiplier and right shift count instead of
    5501              :          directly dividing by D.  */
    5502         9301 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
    5503              : 
    5504              :       /* If the suggested multiplier is more than PREC bits, we can do better
    5505              :          for even divisors, using an initial right shift.  */
    5506         9301 :       if (mh != 0 && (d & 1) == 0)
    5507              :         {
    5508          246 :           pre_shift = ctz_or_zero (d);
    5509          246 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    5510              :                                   &ml, &post_shift);
    5511          246 :           gcc_assert (!mh);
    5512              :         }
    5513              :       else
    5514              :         pre_shift = 0;
    5515              : 
    5516          782 :       if (mh != 0)
    5517              :         {
    5518          782 :           if (post_shift - 1 >= prec)
    5519              :             return NULL;
    5520              : 
    5521              :           /* t1 = oprnd0 h* ml;
    5522              :              t2 = oprnd0 - t1;
    5523              :              t3 = t2 >> 1;
    5524              :              t4 = t1 + t3;
    5525              :              q = t4 >> (post_shift - 1);  */
    5526          782 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    5527          782 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5528          782 :                                           build_int_cst (itype, ml));
    5529          782 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5530              : 
    5531          782 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5532          782 :           def_stmt
    5533          782 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    5534          782 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5535              : 
    5536          782 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5537          782 :           def_stmt
    5538          782 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    5539          782 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5540              : 
    5541          782 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5542          782 :           def_stmt
    5543          782 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    5544              : 
    5545          782 :           if (post_shift != 1)
    5546              :             {
    5547          782 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5548              : 
    5549          782 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5550          782 :               pattern_stmt
    5551          782 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    5552          782 :                                        build_int_cst (itype, post_shift - 1));
    5553              :             }
    5554              :           else
    5555              :             {
    5556              :               q = t4;
    5557              :               pattern_stmt = def_stmt;
    5558              :             }
    5559              :         }
    5560              :       else
    5561              :         {
    5562         8519 :           if (pre_shift >= prec || post_shift >= prec)
    5563              :             return NULL;
    5564              : 
    5565              :           /* t1 = oprnd0 >> pre_shift;
    5566              :              t2 = t1 h* ml;
    5567              :              q = t2 >> post_shift;  */
    5568         8519 :           if (pre_shift)
    5569              :             {
    5570          246 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    5571          246 :               def_stmt
    5572          246 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    5573          246 :                                        build_int_cst (NULL, pre_shift));
    5574          246 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5575              :             }
    5576              :           else
    5577              :             t1 = oprnd0;
    5578              : 
    5579         8519 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5580         8519 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    5581         8519 :                                           build_int_cst (itype, ml));
    5582              : 
    5583         8519 :           if (post_shift)
    5584              :             {
    5585         8509 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5586              : 
    5587         8509 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5588         8509 :               def_stmt
    5589         8509 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    5590         8509 :                                        build_int_cst (itype, post_shift));
    5591              :             }
    5592              :           else
    5593              :             q = t2;
    5594              : 
    5595              :           pattern_stmt = def_stmt;
    5596              :         }
    5597              :     }
    5598              :   else
    5599              :     {
    5600         5097 :       unsigned HOST_WIDE_INT ml;
    5601         5097 :       int post_shift;
    5602         5097 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5603         5097 :       unsigned HOST_WIDE_INT abs_d;
    5604         5097 :       bool add = false;
    5605         5097 :       tree t1, t2, t3, t4;
    5606              : 
    5607              :       /* Give up for -1.  */
    5608         5097 :       if (d == -1)
    5609            0 :         return NULL;
    5610              : 
    5611              :       /* Since d might be INT_MIN, we have to cast to
    5612              :          unsigned HOST_WIDE_INT before negating to avoid
    5613              :          undefined signed overflow.  */
    5614         5097 :       abs_d = (d >= 0
    5615         5097 :                ? (unsigned HOST_WIDE_INT) d
    5616              :                : - (unsigned HOST_WIDE_INT) d);
    5617              : 
    5618              :       /* n rem d = n rem -d */
    5619         5097 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    5620              :         {
    5621            0 :           d = abs_d;
    5622            0 :           oprnd1 = build_int_cst (itype, abs_d);
    5623              :         }
    5624         5097 :       if (HOST_BITS_PER_WIDE_INT >= prec
    5625         5097 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    5626              :         /* This case is not handled correctly below.  */
    5627              :         return NULL;
    5628              : 
    5629         5097 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
    5630         5097 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    5631              :         {
    5632         1572 :           add = true;
    5633         1572 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    5634              :         }
    5635         5097 :       if (post_shift >= prec)
    5636              :         return NULL;
    5637              : 
    5638              :       /* t1 = oprnd0 h* ml;  */
    5639         5097 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5640         5097 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5641         5097 :                                       build_int_cst (itype, ml));
    5642              : 
    5643         5097 :       if (add)
    5644              :         {
    5645              :           /* t2 = t1 + oprnd0;  */
    5646         1572 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5647         1572 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5648         1572 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    5649              :         }
    5650              :       else
    5651              :         t2 = t1;
    5652              : 
    5653         5097 :       if (post_shift)
    5654              :         {
    5655              :           /* t3 = t2 >> post_shift;  */
    5656         4324 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5657         4324 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5658         4324 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    5659         4324 :                                           build_int_cst (itype, post_shift));
    5660              :         }
    5661              :       else
    5662              :         t3 = t2;
    5663              : 
    5664         5097 :       int msb = 1;
    5665         5097 :       int_range_max r;
    5666        10194 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    5667         5097 :       if (!r.varying_p () && !r.undefined_p ())
    5668              :         {
    5669         2914 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    5670              :             msb = 0;
    5671          709 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    5672              :             msb = -1;
    5673              :         }
    5674              : 
    5675         2205 :       if (msb == 0 && d >= 0)
    5676              :         {
    5677              :           /* q = t3;  */
    5678              :           q = t3;
    5679              :           pattern_stmt = def_stmt;
    5680              :         }
    5681              :       else
    5682              :         {
    5683              :           /* t4 = oprnd0 >> (prec - 1);
    5684              :              or if we know from VRP that oprnd0 >= 0
    5685              :              t4 = 0;
    5686              :              or if we know from VRP that oprnd0 < 0
    5687              :              t4 = -1;  */
    5688         2952 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5689         2952 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5690         2952 :           if (msb != 1)
    5691           68 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5692           68 :                                             build_int_cst (itype, msb));
    5693              :           else
    5694         2884 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5695         2884 :                                             build_int_cst (itype, prec - 1));
    5696         2952 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5697              : 
    5698              :           /* q = t3 - t4;  or q = t4 - t3;  */
    5699         2952 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5700         5724 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5701              :                                               d < 0 ? t3 : t4);
    5702              :         }
    5703         5097 :     }
    5704              : 
    5705        14398 :   if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
    5706              :     {
    5707         6587 :       tree r, t1;
    5708              : 
    5709              :       /* We divided.  Now finish by:
    5710              :          t1 = q * oprnd1;
    5711              :          r = oprnd0 - t1;  */
    5712         6587 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5713              : 
    5714         6587 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5715         6587 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5716         6587 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5717              : 
    5718         6587 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5719         6587 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5720              : 
    5721         6587 :       if (is_flclrd_moddiv_p)
    5722              :         {
    5723          146 :         append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5724          146 :         pattern_stmt
    5725          146 :           = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
    5726              :                                                 rhs_code, q, r, oprnd0, oprnd1,
    5727              :                                                 itype);
    5728          146 :         if (pattern_stmt == NULL)
    5729              :           return NULL;
    5730              :         }
    5731              :     }
    5732              : 
    5733              :   /* Pattern detected.  */
    5734        14398 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5735              : 
    5736        14398 :   *type_out = vectype;
    5737        14398 :   return pattern_stmt;
    5738              : }
    5739              : 
    5740              : /* Detects pattern with a modulo operation (S1) where both arguments
    5741              :    are variables of integral type.
    5742              :    The statement is replaced by division, multiplication, and subtraction.
    5743              :    The last statement (S4) is returned.
    5744              : 
    5745              :    Example:
    5746              :    S1 c_t = a_t % b_t;
    5747              : 
    5748              :    is replaced by
    5749              :    S2 x_t = a_t / b_t;
    5750              :    S3 y_t = x_t * b_t;
    5751              :    S4 z_t = a_t - y_t;  */
    5752              : 
    5753              : static gimple *
    5754     30309333 : vect_recog_mod_var_pattern (vec_info *vinfo,
    5755              :                             stmt_vec_info stmt_vinfo, tree *type_out)
    5756              : {
    5757     30309333 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5758     30309333 :   tree oprnd0, oprnd1, vectype, itype;
    5759     30309333 :   gimple *pattern_stmt, *def_stmt;
    5760     30309333 :   enum tree_code rhs_code;
    5761              : 
    5762     30309333 :   if (!is_gimple_assign (last_stmt) || vect_is_reduction (stmt_vinfo))
    5763              :     return NULL;
    5764              : 
    5765     20484869 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5766     20484869 :   if (rhs_code != TRUNC_MOD_EXPR)
    5767              :     return NULL;
    5768              : 
    5769        67901 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5770        67901 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5771        67901 :   itype = TREE_TYPE (oprnd0);
    5772        67901 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5773        59554 :       || TREE_CODE (oprnd1) != SSA_NAME
    5774        43568 :       || TREE_CODE (itype) != INTEGER_TYPE)
    5775              :     return NULL;
    5776              : 
    5777        43441 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5778              : 
    5779        43441 :   if (!vectype
    5780        35477 :       || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
    5781        35477 :       || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
    5782            0 :       || !target_has_vecop_for_code (MULT_EXPR, vectype)
    5783        43441 :       || !target_has_vecop_for_code (MINUS_EXPR, vectype))
    5784        43441 :     return NULL;
    5785              : 
    5786            0 :   tree q, tmp, r;
    5787            0 :   q = vect_recog_temp_ssa_var (itype, NULL);
    5788            0 :   def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
    5789            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5790              : 
    5791            0 :   tmp = vect_recog_temp_ssa_var (itype, NULL);
    5792            0 :   def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
    5793            0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5794              : 
    5795            0 :   r = vect_recog_temp_ssa_var (itype, NULL);
    5796            0 :   pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
    5797              : 
    5798              :   /* Pattern detected.  */
    5799            0 :   *type_out = vectype;
    5800            0 :   vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
    5801              : 
    5802            0 :   return pattern_stmt;
    5803              : }
    5804              : 
    5805              : 
    5806              : /* Return the proper type for converting bool VAR into
    5807              :    an integer value or NULL_TREE if no such type exists.
    5808              :    The type is chosen so that the converted value has the
    5809              :    same number of elements as VAR's vector type.  */
    5810              : 
    5811              : static tree
    5812      4332849 : integer_type_for_mask (tree var, vec_info *vinfo, vect_def_type *dt = nullptr)
    5813              : {
    5814      4332849 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5815              :     return NULL_TREE;
    5816              : 
    5817      1960061 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (var);
    5818      1960061 :   if (dt)
    5819              :     {
    5820       339358 :       if (!def_stmt_info)
    5821         2620 :         *dt = vect_external_def;
    5822              :       else
    5823       336738 :         *dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
    5824              :     }
    5825       339358 :   if (!def_stmt_info
    5826      1876309 :       || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
    5827      3497012 :       || !vect_use_mask_type_p (def_stmt_info))
    5828       765316 :     return NULL_TREE;
    5829              : 
    5830      1194745 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5831              : }
    5832              : 
    5833              : /* Function vect_recog_gcond_pattern
    5834              : 
    5835              :    Try to find pattern like following:
    5836              : 
    5837              :      if (a op b)
    5838              : 
    5839              :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5840              : 
    5841              :      mask = a op b
    5842              :      if (mask != 0)
    5843              : 
    5844              :    and set the mask type on MASK.
    5845              : 
    5846              :    Input:
    5847              : 
    5848              :    * STMT_VINFO: The stmt at the end from which the pattern
    5849              :                  search begins, i.e. cast of a bool to
    5850              :                  an integer type.
    5851              : 
    5852              :    Output:
    5853              : 
    5854              :    * TYPE_OUT: The type of the output of this pattern.
    5855              : 
    5856              :    * Return value: A new stmt that will be used to replace the pattern.  */
    5857              : 
    5858              : static gimple *
    5859     30378369 : vect_recog_gcond_pattern (vec_info *vinfo,
    5860              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5861              : {
    5862              :   /* Currently we only support this for loop vectorization and when multiple
    5863              :      exits.  */
    5864     30378369 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5865      3798765 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5866              :     return NULL;
    5867              : 
    5868      1659319 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5869      1659319 :   gcond* cond = NULL;
    5870     30401727 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5871              :     return NULL;
    5872              : 
    5873       381953 :   auto lhs = gimple_cond_lhs (cond);
    5874       381953 :   auto rhs = gimple_cond_rhs (cond);
    5875       381953 :   auto code = gimple_cond_code (cond);
    5876              : 
    5877       381953 :   tree scalar_type = TREE_TYPE (lhs);
    5878       381953 :   if (VECTOR_TYPE_P (scalar_type))
    5879              :     return NULL;
    5880              : 
    5881              :   /* If the input is a boolean then try to figure out the precision that the
    5882              :      vector type should use.  We cannot use the scalar precision as this would
    5883              :      later mismatch.  This is similar to what recog_bool does.  */
    5884       381953 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5885              :     {
    5886        10226 :       if (tree stype = integer_type_for_mask (lhs, vinfo))
    5887       381953 :         scalar_type = stype;
    5888              :     }
    5889              : 
    5890       381953 :   tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
    5891       381953 :   if (vectype == NULL_TREE)
    5892              :     return NULL;
    5893              : 
    5894       358595 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5895       358595 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5896       358595 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5897              : 
    5898       358595 :   gimple *pattern_stmt
    5899       358595 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5900       358595 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5901              :                          NULL_TREE, NULL_TREE);
    5902       358595 :   *type_out = vectype;
    5903       358595 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5904       358595 :   return pattern_stmt;
    5905              : }
    5906              : 
    5907              : 
    5908              : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5909              :    conversion of MASK to a type suitable for masking VECTYPE.
    5910              :    Built statement gets required vectype and is appended to
    5911              :    a pattern sequence of STMT_VINFO.
    5912              : 
    5913              :    Return converted mask.  */
    5914              : 
    5915              : static tree
    5916       118241 : build_mask_conversion (vec_info *vinfo,
    5917              :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5918              : {
    5919       118241 :   gimple *stmt;
    5920       118241 :   tree masktype, tmp;
    5921              : 
    5922       118241 :   masktype = truth_type_for (vectype);
    5923       118241 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5924       118241 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5925       118241 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5926       118241 :                           stmt, masktype, TREE_TYPE (vectype));
    5927              : 
    5928       118241 :   return tmp;
    5929              : }
    5930              : 
    5931              : 
    5932              : /* Return MASK if MASK is suitable for masking an operation on vectors
    5933              :    of type VECTYPE, otherwise convert it into such a form and return
    5934              :    the result.  Associate any conversion statements with STMT_INFO's
    5935              :    pattern.  */
    5936              : 
    5937              : static tree
    5938        69450 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    5939              :                                stmt_vec_info stmt_info, vec_info *vinfo)
    5940              : {
    5941        69450 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    5942        69450 :   if (mask_type)
    5943              :     {
    5944        69450 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    5945        69450 :       if (mask_vectype
    5946       138900 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    5947        83556 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    5948        55344 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    5949              :     }
    5950        69450 :   return mask;
    5951              : }
    5952              : 
    5953              : 
    5954              : /* Function vect_recog_bool_pattern
    5955              : 
    5956              :    Try to find pattern like following:
    5957              : 
    5958              :      bool a_b, b_b, c_b, d_b, e_b;
    5959              :      TYPE f_T;
    5960              :    loop:
    5961              :      S1  a_b = x1 CMP1 y1;
    5962              :      S2  b_b = x2 CMP2 y2;
    5963              :      S3  c_b = a_b & b_b;
    5964              :      S4  d_b = x3 CMP3 y3;
    5965              :      S5  e_b = c_b | d_b;
    5966              :      S6  f_T = (TYPE) e_b;
    5967              : 
    5968              :    where type 'TYPE' is an integral type.  Or a similar pattern
    5969              :    ending in
    5970              : 
    5971              :      S6  f_Y = e_b ? r_Y : s_Y;
    5972              : 
    5973              :    as results from if-conversion of a complex condition.
    5974              : 
    5975              :    Input:
    5976              : 
    5977              :    * STMT_VINFO: The stmt at the end from which the pattern
    5978              :                  search begins, i.e. cast of a bool to
    5979              :                  an integer type.
    5980              : 
    5981              :    Output:
    5982              : 
    5983              :    * TYPE_OUT: The type of the output of this pattern.
    5984              : 
    5985              :    * Return value: A new stmt that will be used to replace the pattern.
    5986              : 
    5987              :         Assuming size of TYPE is the same as size of all comparisons
    5988              :         (otherwise some casts would be added where needed), the above
    5989              :         sequence we create related pattern stmts:
    5990              :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5991              :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5992              :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5993              :         S5'  e_T = c_T | d_T;
    5994              :         S6'  f_T = e_T;
    5995              : 
    5996              :         Instead of the above S3' we could emit:
    5997              :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5998              :         S3'  c_T = a_T | b_T;
    5999              :         but the above is more efficient.  */
    6000              : 
    6001              : static gimple *
    6002     30378369 : vect_recog_bool_pattern (vec_info *vinfo,
    6003              :                          stmt_vec_info stmt_vinfo, tree *type_out)
    6004              : {
    6005     30378369 :   gimple *last_stmt = stmt_vinfo->stmt;
    6006     30378369 :   enum tree_code rhs_code;
    6007     30378369 :   tree var, lhs, rhs, vectype;
    6008     30378369 :   gimple *pattern_stmt;
    6009              : 
    6010     30378369 :   if (!is_gimple_assign (last_stmt))
    6011              :     return NULL;
    6012              : 
    6013     20992052 :   var = gimple_assign_rhs1 (last_stmt);
    6014     20992052 :   lhs = gimple_assign_lhs (last_stmt);
    6015     20992052 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6016              : 
    6017     20992052 :   if (rhs_code == VIEW_CONVERT_EXPR)
    6018       188412 :     var = TREE_OPERAND (var, 0);
    6019              : 
    6020     20992052 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    6021              :     return NULL;
    6022              : 
    6023       718217 :   hash_set<gimple *> bool_stmts;
    6024              : 
    6025       718217 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    6026              :       || rhs_code == VIEW_CONVERT_EXPR
    6027              :       || rhs_code == FLOAT_EXPR)
    6028              :     {
    6029       176955 :       if (! (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    6030         2235 :              || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    6031       175254 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6032              :         return NULL;
    6033        83017 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6034              : 
    6035        83017 :       tree type = integer_type_for_mask (var, vinfo);
    6036        83017 :       tree cst0, cst1, tmp;
    6037              : 
    6038        83017 :       if (!type)
    6039              :         return NULL;
    6040              : 
    6041              :       /* We may directly use cond with narrowed type to avoid multiple cond
    6042              :          exprs with following result packing and perform single cond with
    6043              :          packed mask instead.  In case of widening we better make cond first
    6044              :          and then extract results.  */
    6045        40906 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    6046        28309 :         type = TREE_TYPE (lhs);
    6047              : 
    6048        40906 :       cst0 = build_int_cst (type, 0);
    6049        40906 :       cst1 = build_int_cst (type, 1);
    6050        40906 :       tmp = vect_recog_temp_ssa_var (type, NULL);
    6051        40906 :       pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    6052              : 
    6053        40906 :       if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    6054              :         {
    6055        12597 :           tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    6056        12597 :           append_pattern_def_seq (vinfo, stmt_vinfo,
    6057              :                                   pattern_stmt, new_vectype);
    6058              : 
    6059        12597 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6060        12597 :           pattern_stmt
    6061        24880 :             = gimple_build_assign (lhs, (rhs_code == FLOAT_EXPR
    6062              :                                          ? FLOAT_EXPR : CONVERT_EXPR), tmp);
    6063              :         }
    6064              : 
    6065        40906 :       *type_out = vectype;
    6066        40906 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6067              : 
    6068        40906 :       return pattern_stmt;
    6069              :     }
    6070              :   else if (rhs_code == COND_EXPR
    6071       195883 :            && TREE_CODE (var) == SSA_NAME)
    6072              :     {
    6073       195883 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6074       195883 :       if (vectype == NULL_TREE)
    6075              :         return NULL;
    6076              : 
    6077              :       /* Build a scalar type for the boolean result that when
    6078              :          vectorized matches the vector type of the result in
    6079              :          size and number of elements.  */
    6080       182058 :       unsigned prec
    6081       182058 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    6082              :                                TYPE_VECTOR_SUBPARTS (vectype));
    6083              : 
    6084       182058 :       tree type
    6085       364116 :         = build_nonstandard_integer_type (prec,
    6086       182058 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    6087       182058 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    6088              :         return NULL;
    6089              : 
    6090       182058 :       enum vect_def_type dt;
    6091       182058 :       if (integer_type_for_mask (var, vinfo))
    6092              :         return NULL;
    6093        31919 :       else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
    6094        31919 :                && vect_is_simple_use (var, vinfo, &dt)
    6095        31919 :                && (dt == vect_external_def
    6096        31912 :                    || dt == vect_constant_def))
    6097              :         {
    6098              :           /* If the condition is already a boolean then manually convert it to a
    6099              :              mask of the given integer type but don't set a vectype.  */
    6100         1325 :           tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
    6101         1325 :           pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
    6102              :                                               build_all_ones_cst (type),
    6103              :                                               build_zero_cst (type));
    6104         1325 :           append_inv_pattern_def_seq (vinfo, pattern_stmt);
    6105         1325 :           var = lhs_ivar;
    6106              :         }
    6107              : 
    6108        31919 :       tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6109        31919 :       pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
    6110        31919 :                                           build_zero_cst (TREE_TYPE (var)));
    6111              : 
    6112        31919 :       tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
    6113        31919 :       if (!new_vectype)
    6114              :         return NULL;
    6115              : 
    6116        31919 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
    6117        31919 :                               TREE_TYPE (var));
    6118              : 
    6119        31919 :       lhs_var = vect_convert_mask_for_vectype (lhs_var, vectype, stmt_vinfo,
    6120              :                                                vinfo);
    6121              : 
    6122        31919 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6123        31919 :       pattern_stmt
    6124        31919 :         = gimple_build_assign (lhs, COND_EXPR, lhs_var,
    6125              :                                gimple_assign_rhs2 (last_stmt),
    6126              :                                gimple_assign_rhs3 (last_stmt));
    6127        31919 :       *type_out = vectype;
    6128        31919 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6129              : 
    6130        31919 :       return pattern_stmt;
    6131              :     }
    6132       433506 :   else if ((rhs_code == BIT_XOR_EXPR
    6133              :             || rhs_code == BIT_AND_EXPR
    6134       433506 :             || rhs_code == BIT_IOR_EXPR)
    6135       330172 :            && TREE_CODE (var) == SSA_NAME)
    6136              :     {
    6137       330172 :       tree rhs2 = gimple_assign_rhs2 (last_stmt);
    6138       330172 :       if (TREE_CODE (rhs2) != SSA_NAME)
    6139              :         return NULL;
    6140       330172 :       tree lhs_type = integer_type_for_mask (lhs, vinfo);
    6141       330172 :       if (!lhs_type)
    6142              :         return NULL;
    6143       169679 :       vectype = get_mask_type_for_scalar_type (vinfo, lhs_type);
    6144       169679 :       if (!vectype)
    6145              :         return NULL;
    6146       169679 :       vect_def_type dt1, dt2;
    6147       169679 :       tree rhs1_type = integer_type_for_mask (var, vinfo, &dt1);
    6148       169679 :       tree rhs2_type = integer_type_for_mask (rhs2, vinfo, &dt2);
    6149       169679 :       if ((rhs1_type || dt1 == vect_external_def)
    6150       155610 :           && (rhs2_type || dt2 == vect_external_def))
    6151              :         return NULL;
    6152              :       /* When one input is a mask and the other is not create a pattern
    6153              :          stmt sequence that creates a mask for the non-mask input and
    6154              :          convert it to one suitable for the output mask used.  */
    6155        33150 :       if (rhs1_type && !rhs2_type)
    6156              :         {
    6157        19081 :           tree rhs1_vectype = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6158        19081 :           if (!rhs1_vectype)
    6159              :             return NULL;
    6160        19081 :           tree rhs2_vectype = get_vectype_for_scalar_type (vinfo,
    6161        19081 :                                                            TREE_TYPE (rhs2));
    6162        19081 :           if (!rhs2_vectype)
    6163              :             return NULL;
    6164        19081 :           tree new_vectype = truth_type_for (rhs2_vectype);
    6165        19081 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6166        19081 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, rhs2,
    6167              :                                               build_zero_cst
    6168        19081 :                                                 (TREE_TYPE (rhs2)));
    6169        19081 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6170        19081 :                                   new_vectype, TREE_TYPE (new_vectype));
    6171        19081 :           rhs2 = vect_convert_mask_for_vectype (tem, rhs1_vectype,
    6172              :                                                 stmt_vinfo, vinfo);
    6173              :         }
    6174        14069 :       else if (!rhs1_type && rhs2_type)
    6175              :         {
    6176        14069 :           tree rhs2_vectype = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6177        14069 :           if (!rhs2_vectype)
    6178              :             return NULL;
    6179        14069 :           tree rhs1_vectype = get_vectype_for_scalar_type (vinfo,
    6180        14069 :                                                            TREE_TYPE (var));
    6181        14069 :           if (!rhs1_vectype)
    6182              :             return NULL;
    6183        14069 :           tree new_vectype = truth_type_for (rhs1_vectype);
    6184        14069 :           tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
    6185        14069 :           pattern_stmt = gimple_build_assign (tem, NE_EXPR, var,
    6186              :                                               build_zero_cst
    6187        14069 :                                                 (TREE_TYPE (var)));
    6188        14069 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    6189        14069 :                                   new_vectype, TREE_TYPE (new_vectype));
    6190        14069 :           var = vect_convert_mask_for_vectype (tem, rhs2_vectype,
    6191              :                                                stmt_vinfo, vinfo);
    6192              :         }
    6193        33150 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6194        33150 :       pattern_stmt = gimple_build_assign (lhs, rhs_code, var, rhs2);
    6195        33150 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6196        33150 :       *type_out = vectype;
    6197        33150 :       return pattern_stmt;
    6198              :     }
    6199       103334 :   else if (rhs_code == SSA_NAME
    6200        27924 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    6201              :     {
    6202         7633 :       stmt_vec_info pattern_stmt_info;
    6203         7633 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6204         7633 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    6205            0 :         return NULL;
    6206              : 
    6207         7633 :       tree type = integer_type_for_mask (var, vinfo);
    6208         7633 :       if (!type)
    6209              :         return NULL;
    6210              : 
    6211         4381 :       var = vect_convert_mask_for_vectype (var, vectype, stmt_vinfo, vinfo);
    6212              : 
    6213         4381 :       tree cst0 = build_int_cst (TREE_TYPE (vectype), 0);
    6214         4381 :       tree cst1 = build_int_cst (TREE_TYPE (vectype), 1);
    6215         4381 :       rhs = vect_recog_temp_ssa_var (TREE_TYPE (vectype), NULL);
    6216         4381 :       pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    6217         4381 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype);
    6218              : 
    6219         4381 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    6220         4381 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    6221         4381 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6222         4381 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6223         4381 :       *type_out = vectype;
    6224         4381 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    6225              : 
    6226         4381 :       return pattern_stmt;
    6227              :     }
    6228              :   else
    6229              :     return NULL;
    6230       718217 : }
    6231              : 
    6232              : 
    6233              : /* Function vect_recog_mask_conversion_pattern
    6234              : 
    6235              :    Try to find statements which require boolean type
    6236              :    converison.  Additional conversion statements are
    6237              :    added to handle such cases.  For example:
    6238              : 
    6239              :    bool m_1, m_2, m_3;
    6240              :    int i_4, i_5;
    6241              :    double d_6, d_7;
    6242              :    char c_1, c_2, c_3;
    6243              : 
    6244              :    S1   m_1 = i_4 > i_5;
    6245              :    S2   m_2 = d_6 < d_7;
    6246              :    S3   m_3 = m_1 & m_2;
    6247              :    S4   c_1 = m_3 ? c_2 : c_3;
    6248              : 
    6249              :    Will be transformed into:
    6250              : 
    6251              :    S1   m_1 = i_4 > i_5;
    6252              :    S2   m_2 = d_6 < d_7;
    6253              :    S3'' m_2' = (_Bool[bitsize=32])m_2
    6254              :    S3'  m_3' = m_1 & m_2';
    6255              :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    6256              :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    6257              : 
    6258              : static gimple *
    6259     30405527 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    6260              :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    6261              : {
    6262     30405527 :   gimple *last_stmt = stmt_vinfo->stmt;
    6263     30405527 :   enum tree_code rhs_code;
    6264     30405527 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    6265     30405527 :   tree vectype1, vectype2;
    6266     30405527 :   stmt_vec_info pattern_stmt_info;
    6267              : 
    6268              :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    6269              :      conversion.  */
    6270     30405527 :   if (is_gimple_call (last_stmt)
    6271     30405527 :       && gimple_call_internal_p (last_stmt))
    6272              :     {
    6273       103289 :       gcall *pattern_stmt;
    6274              : 
    6275       103289 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    6276       103289 :       int mask_argno = internal_fn_mask_index (ifn);
    6277       103289 :       if (mask_argno < 0)
    6278              :         return NULL;
    6279              : 
    6280         9954 :       bool store_p = internal_store_fn_p (ifn);
    6281         9954 :       bool load_p = internal_store_fn_p (ifn);
    6282         9954 :       if (store_p)
    6283              :         {
    6284         1811 :           int rhs_index = internal_fn_stored_value_index (ifn);
    6285         1811 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    6286         1811 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    6287              :         }
    6288              :       else
    6289              :         {
    6290         8143 :           lhs = gimple_call_lhs (last_stmt);
    6291         8143 :           if (!lhs)
    6292              :             return NULL;
    6293         8143 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6294              :         }
    6295              : 
    6296         9954 :       if (!vectype1)
    6297              :         return NULL;
    6298              : 
    6299         9736 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    6300         9736 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    6301         9736 :       if (mask_arg_type)
    6302              :         {
    6303         8567 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    6304              : 
    6305         8567 :           if (!vectype2
    6306         8567 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6307              :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    6308         5206 :             return NULL;
    6309              :         }
    6310         1169 :       else if (store_p || load_p)
    6311              :         return NULL;
    6312              : 
    6313         4273 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    6314              : 
    6315         4273 :       auto_vec<tree, 8> args;
    6316         4273 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    6317         4273 :       args.safe_grow (nargs, true);
    6318        21365 :       for (unsigned int i = 0; i < nargs; ++i)
    6319        17092 :         args[i] = ((int) i == mask_argno
    6320        17092 :                    ? tmp
    6321        12819 :                    : gimple_call_arg (last_stmt, i));
    6322         4273 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    6323              : 
    6324         4273 :       if (!store_p)
    6325              :         {
    6326         4001 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6327         4001 :           gimple_call_set_lhs (pattern_stmt, lhs);
    6328              :         }
    6329              : 
    6330         4001 :       if (load_p || store_p)
    6331          272 :         gimple_call_set_nothrow (pattern_stmt, true);
    6332              : 
    6333         4273 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    6334         4273 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    6335         1749 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6336              : 
    6337         4273 :       *type_out = vectype1;
    6338         4273 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6339              : 
    6340         4273 :       return pattern_stmt;
    6341         4273 :     }
    6342              : 
    6343     30302238 :   if (!is_gimple_assign (last_stmt))
    6344              :     return NULL;
    6345              : 
    6346     21019210 :   gimple *pattern_stmt;
    6347     21019210 :   lhs = gimple_assign_lhs (last_stmt);
    6348     21019210 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    6349     21019210 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    6350              : 
    6351              :   /* Check for cond expression requiring mask conversion.  */
    6352     21019210 :   if (rhs_code == COND_EXPR)
    6353              :     {
    6354       181085 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    6355              : 
    6356       181085 :       gcc_assert (! COMPARISON_CLASS_P (rhs1));
    6357       181085 :       if (TREE_CODE (rhs1) == SSA_NAME)
    6358              :         {
    6359       181085 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6360       181085 :           if (!rhs1_type)
    6361              :             return NULL;
    6362              :         }
    6363              :       else
    6364              :         return NULL;
    6365              : 
    6366       169327 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6367              : 
    6368       169327 :       if (!vectype1 || !vectype2)
    6369              :         return NULL;
    6370              : 
    6371              :       /* Continue if a conversion is needed.  Also continue if we have
    6372              :          a comparison whose vector type would normally be different from
    6373              :          VECTYPE2 when considered in isolation.  In that case we'll
    6374              :          replace the comparison with an SSA name (so that we can record
    6375              :          its vector type) and behave as though the comparison was an SSA
    6376              :          name from the outset.  */
    6377       167240 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6378              :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6379              :         return NULL;
    6380              : 
    6381        42530 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    6382        85060 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6383        42530 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6384              :       else
    6385              :         tmp = rhs1;
    6386              : 
    6387        42530 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6388        42530 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    6389              :                                           gimple_assign_rhs2 (last_stmt),
    6390              :                                           gimple_assign_rhs3 (last_stmt));
    6391              : 
    6392        42530 :       *type_out = vectype1;
    6393        42530 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6394              : 
    6395        42530 :       return pattern_stmt;
    6396              :     }
    6397              : 
    6398              :   /* Now check for binary boolean operations requiring conversion for
    6399              :      one of operands.  */
    6400     20838125 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6401              :     return NULL;
    6402              : 
    6403      1744344 :   if (rhs_code != BIT_IOR_EXPR
    6404              :       && rhs_code != BIT_XOR_EXPR
    6405      1744344 :       && rhs_code != BIT_AND_EXPR
    6406      1447322 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    6407              :     return NULL;
    6408              : 
    6409      1560057 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    6410              : 
    6411      1560057 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6412      1560057 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    6413              : 
    6414      1560057 :   if (!rhs1_type || !rhs2_type
    6415      1560057 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    6416              :     return NULL;
    6417              : 
    6418        16094 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    6419              :     {
    6420        10119 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6421        10119 :       if (!vectype1)
    6422              :         return NULL;
    6423        10119 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    6424              :     }
    6425              :   else
    6426              :     {
    6427         5975 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6428         5975 :       if (!vectype1)
    6429              :         return NULL;
    6430         5975 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6431              :     }
    6432              : 
    6433        16094 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6434        16094 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    6435              : 
    6436        16094 :   *type_out = vectype1;
    6437        16094 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6438              : 
    6439        16094 :   return pattern_stmt;
    6440              : }
    6441              : 
    6442              : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    6443              :    the boolean condition under which it occurs, otherwise return null.  */
    6444              : 
    6445              : static tree
    6446       105374 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    6447              : {
    6448       105374 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    6449              :     {
    6450       104044 :       gcc_assert (gimple_assign_single_p (def_assign));
    6451              :       return NULL_TREE;
    6452              :     }
    6453              : 
    6454         1330 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    6455              :     {
    6456         1330 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    6457         1330 :       int mask_index = internal_fn_mask_index (ifn);
    6458         1330 :       return gimple_call_arg (def_call, mask_index);
    6459              :     }
    6460              : 
    6461            0 :   gcc_unreachable ();
    6462              : }
    6463              : 
    6464              : /* Return the equivalent of:
    6465              : 
    6466              :      fold_convert (TYPE, VALUE)
    6467              : 
    6468              :    with the expectation that the operation will be vectorized.
    6469              :    If new statements are needed, add them as pattern statements
    6470              :    to STMT_INFO.  */
    6471              : 
    6472              : static tree
    6473            0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    6474              :                                 tree type, tree value, stmt_vec_info stmt_info)
    6475              : {
    6476            0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    6477              :     return value;
    6478              : 
    6479            0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    6480            0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    6481            0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    6482              :                           get_vectype_for_scalar_type (vinfo, type));
    6483            0 :   return new_value;
    6484              : }
    6485              : 
    6486              : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    6487              :    internal function.  Return the final statement on success and set
    6488              :    *TYPE_OUT to the vector type being loaded or stored.
    6489              : 
    6490              :    This function only handles gathers and scatters that were recognized
    6491              :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    6492              : 
    6493              : static gimple *
    6494     30405527 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    6495              :                                    stmt_vec_info stmt_info, tree *type_out)
    6496              : {
    6497              :   /* Currently we only support this for loop vectorization.  */
    6498     34215687 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6499      3810160 :   if (!loop_vinfo)
    6500              :     return NULL;
    6501              : 
    6502              :   /* Make sure that we're looking at a gather load or scatter store.  */
    6503      3810160 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    6504      3810160 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    6505              :     return NULL;
    6506              : 
    6507              :   /* Get the boolean that controls whether the load or store happens.
    6508              :      This is null if the operation is unconditional.  */
    6509       105374 :   tree mask = vect_get_load_store_mask (stmt_info);
    6510              : 
    6511              :   /* DR analysis nailed down the vector type for the access.  */
    6512       105374 :   tree gs_vectype = STMT_VINFO_VECTYPE (stmt_info);
    6513              : 
    6514              :   /* Make sure that the target supports an appropriate internal
    6515              :      function for the gather/scatter operation.  */
    6516       105374 :   gather_scatter_info gs_info;
    6517       105374 :   if (!vect_check_gather_scatter (stmt_info, gs_vectype, loop_vinfo, &gs_info)
    6518       105374 :       || gs_info.ifn == IFN_LAST)
    6519              :     return NULL;
    6520              : 
    6521              :   /* Convert the mask to the right form.  */
    6522            0 :   if (mask)
    6523            0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    6524              :                                           loop_vinfo);
    6525            0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6526            0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6527            0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6528            0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6529            0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6530              : 
    6531              :   /* Get the invariant base and non-invariant offset, converting the
    6532              :      latter to the same width as the vector elements.  */
    6533            0 :   tree base = gs_info.base;
    6534            0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6535            0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6536              :                                                 gs_info.offset, stmt_info);
    6537              : 
    6538              :   /* Build the new pattern statement.  */
    6539            0 :   tree scale = size_int (gs_info.scale);
    6540            0 :   gcall *pattern_stmt;
    6541              : 
    6542            0 :   if (DR_IS_READ (dr))
    6543              :     {
    6544            0 :       tree zero = build_zero_cst (gs_info.element_type);
    6545            0 :       if (mask != NULL)
    6546              :         {
    6547            0 :           int elsval = MASK_LOAD_ELSE_ZERO;
    6548              : 
    6549            0 :           tree vec_els
    6550            0 :             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
    6551            0 :           pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
    6552              :                                                      gs_info.alias_ptr,
    6553              :                                                      offset, scale, zero, mask,
    6554              :                                                      vec_els);
    6555              :         }
    6556              :       else
    6557            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
    6558              :                                                    gs_info.alias_ptr,
    6559              :                                                    offset, scale, zero);
    6560            0 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
    6561            0 :       tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6562            0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6563              :     }
    6564              :   else
    6565              :     {
    6566            0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6567            0 :       if (mask != NULL)
    6568            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
    6569              :                                                    base, gs_info.alias_ptr,
    6570              :                                                    offset, scale, rhs, mask);
    6571              :       else
    6572            0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6573              :                                                    base, gs_info.alias_ptr,
    6574              :                                                    offset, scale, rhs);
    6575              :     }
    6576            0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6577              : 
    6578              :   /* Copy across relevant vectorization info and associate DR with the
    6579              :      new pattern statement instead of the original statement.  */
    6580            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6581            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6582              : 
    6583            0 :   *type_out = gs_vectype;
    6584            0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6585              : 
    6586            0 :   return pattern_stmt;
    6587              : }
    6588              : 
    6589              : /* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
    6590              :    is points to a load statement that reads the same data as that of
    6591              :    STORE_VINFO.  */
    6592              : 
    6593              : static bool
    6594        22826 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
    6595              :                                   stmt_vec_info store_vinfo, tree cond_arg)
    6596              : {
    6597        22826 :   stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
    6598        22826 :   if (!load_stmt_vinfo
    6599        13374 :       || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
    6600         7623 :       || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
    6601        30449 :       || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
    6602              :                           STMT_VINFO_DATA_REF (load_stmt_vinfo)))
    6603        17825 :     return false;
    6604              : 
    6605              :   return true;
    6606              : }
    6607              : 
    6608              : /* Function vect_recog_cond_store_pattern
    6609              : 
    6610              :    Try to find the following pattern:
    6611              : 
    6612              :    x = *_3;
    6613              :    c = a CMP b;
    6614              :    y = c ? t_20 : x;
    6615              :    *_3 = y;
    6616              : 
    6617              :    where the store of _3 happens on a conditional select on a value loaded
    6618              :    from the same location.  In such case we can elide the initial load if
    6619              :    MASK_STORE is supported and instead only conditionally write out the result.
    6620              : 
    6621              :    The pattern produces for the above:
    6622              : 
    6623              :    c = a CMP b;
    6624              :    .MASK_STORE (_3, c, t_20)
    6625              : 
    6626              :    Input:
    6627              : 
    6628              :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    6629              :    example, when this function is called with _3 then the search begins.
    6630              : 
    6631              :    Output:
    6632              : 
    6633              :    * TYPE_OUT: The type of the output  of this pattern.
    6634              : 
    6635              :    * Return value: A new stmt that will be used to replace the sequence.  */
    6636              : 
    6637              : static gimple *
    6638     30405527 : vect_recog_cond_store_pattern (vec_info *vinfo,
    6639              :                                stmt_vec_info stmt_vinfo, tree *type_out)
    6640              : {
    6641     30405527 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6642      3810160 :   if (!loop_vinfo)
    6643              :     return NULL;
    6644              : 
    6645      3810160 :   gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
    6646              : 
    6647              :   /* Needs to be a gimple store where we have DR info for.  */
    6648      3810160 :   if (!STMT_VINFO_DATA_REF (stmt_vinfo)
    6649       908769 :       || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
    6650      4114506 :       || !gimple_store_p (store_stmt))
    6651      3507500 :     return NULL;
    6652              : 
    6653       302660 :   tree st_rhs = gimple_assign_rhs1 (store_stmt);
    6654              : 
    6655       302660 :   if (TREE_CODE (st_rhs) != SSA_NAME)
    6656              :     return NULL;
    6657              : 
    6658       235453 :   auto cond_vinfo = vinfo->lookup_def (st_rhs);
    6659              : 
    6660              :   /* If the condition isn't part of the loop then bool recog wouldn't have seen
    6661              :      it and so this transformation may not be valid.  */
    6662       235453 :   if (!cond_vinfo)
    6663              :     return NULL;
    6664              : 
    6665       221226 :   cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
    6666     30616114 :   gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
    6667       277153 :   if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
    6668              :     return NULL;
    6669              : 
    6670              :   /* Check if the else value matches the original loaded one.  */
    6671        12040 :   bool invert = false;
    6672        12040 :   tree cmp_ls = gimple_arg (cond_stmt, 0);
    6673        12040 :   if (TREE_CODE (cmp_ls) != SSA_NAME)
    6674              :     return NULL;
    6675              : 
    6676        12040 :   tree cond_arg1 = gimple_arg (cond_stmt, 1);
    6677        12040 :   tree cond_arg2 = gimple_arg (cond_stmt, 2);
    6678              : 
    6679        12040 :   if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
    6680        12040 :       && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
    6681              :                                                       cond_arg1)))
    6682              :     return NULL;
    6683              : 
    6684         5001 :   vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
    6685              : 
    6686         5001 :   tree scalar_type = TREE_TYPE (st_rhs);
    6687         5001 :   if (VECTOR_TYPE_P (scalar_type))
    6688              :     return NULL;
    6689              : 
    6690         5001 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6691         5001 :   if (vectype == NULL_TREE)
    6692              :     return NULL;
    6693              : 
    6694         5001 :   machine_mode mask_mode;
    6695         5001 :   machine_mode vecmode = TYPE_MODE (vectype);
    6696         1066 :   if (!VECTOR_MODE_P (vecmode)
    6697         5001 :       || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
    6698            0 :       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    6699         5001 :       || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
    6700         5001 :     return NULL;
    6701              : 
    6702            0 :   tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
    6703            0 :   if (may_be_nonaddressable_p (base))
    6704              :     return NULL;
    6705              : 
    6706              :   /* We need to use the false parameter of the conditional select.  */
    6707            0 :   tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
    6708            0 :   tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
    6709            0 :   gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
    6710              : 
    6711              :   /* This is a rough estimation to check that there aren't any aliasing stores
    6712              :      in between the load and store.  It's a bit strict, but for now it's good
    6713              :      enough.  */
    6714            0 :   if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
    6715              :     return NULL;
    6716              : 
    6717              :   /* If we have to invert the condition, i.e. use the true argument rather than
    6718              :      the false argument, we have to negate the mask.  */
    6719            0 :   if (invert)
    6720              :     {
    6721            0 :       tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6722              : 
    6723              :       /* Invert the mask using ^ 1.  */
    6724            0 :       tree itype = TREE_TYPE (cmp_ls);
    6725            0 :       gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
    6726              :                                            build_int_cst (itype, 1));
    6727              : 
    6728            0 :       tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
    6729            0 :       append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
    6730            0 :       cmp_ls= var;
    6731              :     }
    6732              : 
    6733            0 :   if (TREE_CODE (base) != MEM_REF)
    6734            0 :    base = build_fold_addr_expr (base);
    6735              : 
    6736            0 :   tree ptr = build_int_cst (reference_alias_ptr_type (base),
    6737            0 :                             get_object_alignment (base));
    6738              : 
    6739              :   /* Convert the mask to the right form.  */
    6740            0 :   tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
    6741              :                                              vinfo);
    6742              : 
    6743            0 :   gcall *call
    6744            0 :     = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
    6745              :                                   cond_store_arg);
    6746            0 :   gimple_set_location (call, gimple_location (store_stmt));
    6747              : 
    6748              :   /* Copy across relevant vectorization info and associate DR with the
    6749              :      new pattern statement instead of the original statement.  */
    6750            0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
    6751            0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6752              : 
    6753            0 :   *type_out = vectype;
    6754            0 :   return call;
    6755              : }
    6756              : 
    6757              : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6758              :    that we want to consider for narrowing.  */
    6759              : 
    6760              : static bool
    6761     60425861 : vect_narrowable_type_p (tree type)
    6762              : {
    6763     60425861 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6764              : }
    6765              : 
    6766              : /* Return true if the operation given by CODE can be truncated to N bits
    6767              :    when only N bits of the output are needed.  This is only true if bit N+1
    6768              :    of the inputs has no effect on the low N bits of the result.  */
    6769              : 
    6770              : static bool
    6771     15247902 : vect_truncatable_operation_p (tree_code code)
    6772              : {
    6773     15247902 :   switch (code)
    6774              :     {
    6775              :     case NEGATE_EXPR:
    6776              :     case PLUS_EXPR:
    6777              :     case MINUS_EXPR:
    6778              :     case MULT_EXPR:
    6779              :     case BIT_NOT_EXPR:
    6780              :     case BIT_AND_EXPR:
    6781              :     case BIT_IOR_EXPR:
    6782              :     case BIT_XOR_EXPR:
    6783              :     case COND_EXPR:
    6784              :       return true;
    6785              : 
    6786      5836083 :     default:
    6787      5836083 :       return false;
    6788              :     }
    6789              : }
    6790              : 
    6791              : /* Record that STMT_INFO could be changed from operating on TYPE to
    6792              :    operating on a type with the precision and sign given by PRECISION
    6793              :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6794              :    it might not be a whole number of bytes.  */
    6795              : 
    6796              : static void
    6797      2349047 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6798              :                          unsigned int precision, signop sign)
    6799              : {
    6800              :   /* Round the precision up to a whole number of bytes.  */
    6801      2349047 :   precision = vect_element_precision (precision);
    6802      2349047 :   if (precision < TYPE_PRECISION (type)
    6803      2349047 :       && (!stmt_info->operation_precision
    6804        44633 :           || stmt_info->operation_precision > precision))
    6805              :     {
    6806      1481634 :       stmt_info->operation_precision = precision;
    6807      1481634 :       stmt_info->operation_sign = sign;
    6808              :     }
    6809      2349047 : }
    6810              : 
    6811              : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6812              :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6813              :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6814              : 
    6815              : static void
    6816     11080661 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6817              :                               unsigned int min_input_precision)
    6818              : {
    6819              :   /* This operation in isolation only requires the inputs to have
    6820              :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6821              :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6822              :      as a whole.  E.g. consider something like:
    6823              : 
    6824              :          unsigned short *x, *y;
    6825              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6826              : 
    6827              :      The right shift can be done on unsigned chars, and only requires the
    6828              :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6829              :      approach would mean turning a natural chain of single-vector unsigned
    6830              :      short operations into one that truncates "*x" and then extends
    6831              :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6832              :      operation and one vector for each unsigned char operation.
    6833              :      This would be a significant pessimization.
    6834              : 
    6835              :      Instead only propagate the maximum of this precision and the precision
    6836              :      required by the users of the result.  This means that we don't pessimize
    6837              :      the case above but continue to optimize things like:
    6838              : 
    6839              :          unsigned char *y;
    6840              :          unsigned short *x;
    6841              :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6842              : 
    6843              :      Here we would truncate two vectors of *x to a single vector of
    6844              :      unsigned chars and use single-vector unsigned char operations for
    6845              :      everything else, rather than doing two unsigned short copies of
    6846              :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6847     11080661 :   min_input_precision = MAX (min_input_precision,
    6848              :                              stmt_info->min_output_precision);
    6849              : 
    6850     11080661 :   if (min_input_precision < TYPE_PRECISION (type)
    6851     11080661 :       && (!stmt_info->min_input_precision
    6852        64850 :           || stmt_info->min_input_precision > min_input_precision))
    6853       546551 :     stmt_info->min_input_precision = min_input_precision;
    6854     11080661 : }
    6855              : 
    6856              : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6857              :    we can calculate a reduced number of output bits for STMT_INFO,
    6858              :    whose result is LHS.  */
    6859              : 
    6860              : static bool
    6861     14248525 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6862              :                                        stmt_vec_info stmt_info, tree lhs)
    6863              : {
    6864              :   /* Take the maximum precision required by users of the result.  */
    6865     14248525 :   unsigned int precision = 0;
    6866     14248525 :   imm_use_iterator iter;
    6867     14248525 :   use_operand_p use;
    6868     29290254 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6869              :     {
    6870     14785848 :       gimple *use_stmt = USE_STMT (use);
    6871     14785848 :       if (is_gimple_debug (use_stmt))
    6872       532745 :         continue;
    6873     14253103 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6874     14253103 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6875              :         return false;
    6876              :       /* The input precision recorded for COND_EXPRs applies only to the
    6877              :          "then" and "else" values.  */
    6878       260984 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6879       225471 :       if (assign
    6880       225471 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6881          525 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6882          525 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6883              :         return false;
    6884       794830 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6885     13992644 :     }
    6886              : 
    6887       255881 :   if (dump_enabled_p ())
    6888         5778 :     dump_printf_loc (MSG_NOTE, vect_location,
    6889              :                      "only the low %d bits of %T are significant\n",
    6890              :                      precision, lhs);
    6891       255881 :   stmt_info->min_output_precision = precision;
    6892       255881 :   return true;
    6893              : }
    6894              : 
    6895              : /* Calculate min_output_precision for STMT_INFO.  */
    6896              : 
    6897              : static void
    6898     36734925 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6899              : {
    6900              :   /* We're only interested in statements with a narrowable result.  */
    6901     36734925 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6902     36734925 :   if (!lhs
    6903     28488619 :       || TREE_CODE (lhs) != SSA_NAME
    6904     60725541 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6905              :     return;
    6906              : 
    6907     14248525 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6908     13992644 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6909              : }
    6910              : 
    6911              : /* Use range information to decide whether STMT (described by STMT_INFO)
    6912              :    could be done in a narrower type.  This is effectively a forward
    6913              :    propagation, since it uses context-independent information that applies
    6914              :    to all users of an SSA name.  */
    6915              : 
    6916              : static void
    6917     20325369 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6918              : {
    6919     20325369 :   tree lhs = gimple_assign_lhs (stmt);
    6920     20325369 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6921     18084521 :     return;
    6922              : 
    6923     16109876 :   tree type = TREE_TYPE (lhs);
    6924     16109876 :   if (!vect_narrowable_type_p (type))
    6925              :     return;
    6926              : 
    6927              :   /* First see whether we have any useful range information for the result.  */
    6928     10919048 :   unsigned int precision = TYPE_PRECISION (type);
    6929     10919048 :   signop sign = TYPE_SIGN (type);
    6930     10919048 :   wide_int min_value, max_value;
    6931     10919048 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6932              :     return;
    6933              : 
    6934      5309297 :   tree_code code = gimple_assign_rhs_code (stmt);
    6935      5309297 :   unsigned int nops = gimple_num_ops (stmt);
    6936              : 
    6937      5309297 :   if (!vect_truncatable_operation_p (code))
    6938              :     {
    6939              :       /* Handle operations that can be computed in type T if all inputs
    6940              :          and outputs can be represented in type T.  Also handle left and
    6941              :          right shifts, where (in addition) the maximum shift amount must
    6942              :          be less than the number of bits in T.  */
    6943      1963053 :       bool is_shift;
    6944      1963053 :       switch (code)
    6945              :         {
    6946              :         case LSHIFT_EXPR:
    6947              :         case RSHIFT_EXPR:
    6948              :           is_shift = true;
    6949              :           break;
    6950              : 
    6951       265085 :         case ABS_EXPR:
    6952       265085 :         case MIN_EXPR:
    6953       265085 :         case MAX_EXPR:
    6954       265085 :         case TRUNC_DIV_EXPR:
    6955       265085 :         case CEIL_DIV_EXPR:
    6956       265085 :         case FLOOR_DIV_EXPR:
    6957       265085 :         case ROUND_DIV_EXPR:
    6958       265085 :         case EXACT_DIV_EXPR:
    6959              :           /* Modulus is excluded because it is typically calculated by doing
    6960              :              a division, for which minimum signed / -1 isn't representable in
    6961              :              the original signed type.  We could take the division range into
    6962              :              account instead, if handling modulus ever becomes important.  */
    6963       265085 :           is_shift = false;
    6964       265085 :           break;
    6965              : 
    6966              :         default:
    6967              :           return;
    6968              :         }
    6969      1307869 :       for (unsigned int i = 1; i < nops; ++i)
    6970              :         {
    6971      1008474 :           tree op = gimple_op (stmt, i);
    6972      1008474 :           wide_int op_min_value, op_max_value;
    6973      1008474 :           if (TREE_CODE (op) == INTEGER_CST)
    6974              :             {
    6975       303968 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6976       303968 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6977              :             }
    6978       704506 :           else if (TREE_CODE (op) == SSA_NAME)
    6979              :             {
    6980       704506 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    6981              :                 return;
    6982              :             }
    6983              :           else
    6984              :             return;
    6985              : 
    6986       660896 :           if (is_shift && i == 2)
    6987              :             {
    6988              :               /* There needs to be one more bit than the maximum shift amount.
    6989              : 
    6990              :                  If the maximum shift amount is already 1 less than PRECISION
    6991              :                  then we can't narrow the shift further.  Dealing with that
    6992              :                  case first ensures that we can safely use an unsigned range
    6993              :                  below.
    6994              : 
    6995              :                  op_min_value isn't relevant, since shifts by negative amounts
    6996              :                  are UB.  */
    6997       200027 :               if (wi::geu_p (op_max_value, precision - 1))
    6998              :                 return;
    6999       181827 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    7000              : 
    7001              :               /* As explained below, we can convert a signed shift into an
    7002              :                  unsigned shift if the sign bit is always clear.  At this
    7003              :                  point we've already processed the ranges of the output and
    7004              :                  the first input.  */
    7005       181827 :               auto op_sign = sign;
    7006       181827 :               if (sign == SIGNED && !wi::neg_p (min_value))
    7007              :                 op_sign = UNSIGNED;
    7008       363654 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    7009       181827 :                                              precision, op_sign);
    7010       363654 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    7011       181827 :                                              precision, op_sign);
    7012              :             }
    7013       642696 :           min_value = wi::min (min_value, op_min_value, sign);
    7014       642696 :           max_value = wi::max (max_value, op_max_value, sign);
    7015      1008474 :         }
    7016              :     }
    7017              : 
    7018              :   /* Try to switch signed types for unsigned types if we can.
    7019              :      This is better for two reasons.  First, unsigned ops tend
    7020              :      to be cheaper than signed ops.  Second, it means that we can
    7021              :      handle things like:
    7022              : 
    7023              :         signed char c;
    7024              :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    7025              : 
    7026              :      as:
    7027              : 
    7028              :         signed char c;
    7029              :         unsigned short res_1 = (unsigned short) c & 0xff00;
    7030              :         int res = (int) res_1;
    7031              : 
    7032              :      where the intermediate result res_1 has unsigned rather than
    7033              :      signed type.  */
    7034      3645639 :   if (sign == SIGNED && !wi::neg_p (min_value))
    7035              :     sign = UNSIGNED;
    7036              : 
    7037              :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    7038      3645639 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    7039      3645639 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    7040      3645639 :   unsigned int value_precision = MAX (precision1, precision2);
    7041      3645639 :   if (value_precision >= precision)
    7042              :     return;
    7043              : 
    7044      2240848 :   if (dump_enabled_p ())
    7045       107156 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7046              :                      " without loss of precision: %G",
    7047              :                      sign == SIGNED ? "signed" : "unsigned",
    7048              :                      value_precision, (gimple *) stmt);
    7049              : 
    7050      2240848 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    7051      2240848 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    7052     10919048 : }
    7053              : 
    7054              : /* Use information about the users of STMT's result to decide whether
    7055              :    STMT (described by STMT_INFO) could be done in a narrower type.
    7056              :    This is effectively a backward propagation.  */
    7057              : 
    7058              : static void
    7059     20325369 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    7060              : {
    7061     20325369 :   tree_code code = gimple_assign_rhs_code (stmt);
    7062     20325369 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    7063     20325369 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    7064     20325369 :   if (!vect_narrowable_type_p (type))
    7065     11485556 :     return;
    7066              : 
    7067     12747484 :   unsigned int precision = TYPE_PRECISION (type);
    7068     12747484 :   unsigned int operation_precision, min_input_precision;
    7069     12747484 :   switch (code)
    7070              :     {
    7071      2340834 :     CASE_CONVERT:
    7072              :       /* Only the bits that contribute to the output matter.  Don't change
    7073              :          the precision of the operation itself.  */
    7074      2340834 :       operation_precision = precision;
    7075      2340834 :       min_input_precision = stmt_info->min_output_precision;
    7076      2340834 :       break;
    7077              : 
    7078       468045 :     case LSHIFT_EXPR:
    7079       468045 :     case RSHIFT_EXPR:
    7080       468045 :       {
    7081       468045 :         tree shift = gimple_assign_rhs2 (stmt);
    7082       468045 :         unsigned int min_const_shift, max_const_shift;
    7083       468045 :         wide_int min_shift, max_shift;
    7084       468045 :         if (TREE_CODE (shift) == SSA_NAME
    7085        99185 :             && vect_get_range_info (shift, &min_shift, &max_shift)
    7086        75395 :             && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
    7087       540676 :             && wi::lt_p (max_shift, TYPE_PRECISION (type),
    7088        72631 :                          TYPE_SIGN (TREE_TYPE (shift))))
    7089              :           {
    7090        64652 :             min_const_shift = min_shift.to_uhwi ();
    7091        64652 :             max_const_shift = max_shift.to_uhwi ();
    7092              :           }
    7093       403393 :         else if (TREE_CODE (shift) == INTEGER_CST
    7094       772253 :                  && wi::ltu_p (wi::to_widest (shift), precision))
    7095       368752 :           min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
    7096              :         else
    7097        34641 :           return;
    7098       433404 :         if (code == LSHIFT_EXPR)
    7099              :           {
    7100              :             /* Avoid creating an undefined shift.
    7101              : 
    7102              :                ??? We could instead use min_output_precision as-is and
    7103              :                optimize out-of-range shifts to zero.  However, only
    7104              :                degenerate testcases shift away all their useful input data,
    7105              :                and it isn't natural to drop input operations in the middle
    7106              :                of vectorization.  This sort of thing should really be
    7107              :                handled before vectorization.  */
    7108       101707 :             operation_precision = MAX (stmt_info->min_output_precision,
    7109              :                                        max_const_shift + 1);
    7110              :             /* We need CONST_SHIFT fewer bits of the input.  */
    7111       101707 :             min_input_precision = (MAX (operation_precision, max_const_shift)
    7112              :                                    - min_const_shift);
    7113              :           }
    7114              :         else
    7115              :           {
    7116              :             /* We need CONST_SHIFT extra bits to do the operation.  */
    7117       331697 :             operation_precision = (stmt_info->min_output_precision
    7118              :                                    + max_const_shift);
    7119       331697 :             min_input_precision = operation_precision;
    7120              :           }
    7121       433404 :         break;
    7122       468045 :       }
    7123              : 
    7124      9938605 :     default:
    7125      9938605 :       if (vect_truncatable_operation_p (code))
    7126              :         {
    7127              :           /* Input bit N has no effect on output bits N-1 and lower.  */
    7128      6065575 :           operation_precision = stmt_info->min_output_precision;
    7129      6065575 :           min_input_precision = operation_precision;
    7130      6065575 :           break;
    7131              :         }
    7132              :       return;
    7133              :     }
    7134              : 
    7135      8839813 :   if (operation_precision < precision)
    7136              :     {
    7137       108199 :       if (dump_enabled_p ())
    7138         2753 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    7139              :                          " without affecting users: %G",
    7140         2753 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    7141              :                          operation_precision, (gimple *) stmt);
    7142       216398 :       vect_set_operation_type (stmt_info, type, operation_precision,
    7143       108199 :                                TYPE_SIGN (type));
    7144              :     }
    7145      8839813 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    7146              : }
    7147              : 
    7148              : /* Return true if the statement described by STMT_INFO sets a boolean
    7149              :    SSA_NAME and if we know how to vectorize this kind of statement using
    7150              :    vector mask types.  */
    7151              : 
    7152              : static bool
    7153     37611843 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    7154              : {
    7155     37611843 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    7156     37611843 :   tree_code code = ERROR_MARK;
    7157     37611843 :   gassign *assign = NULL;
    7158     37611843 :   gcond *cond = NULL;
    7159              : 
    7160     37611843 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    7161     20950220 :     code = gimple_assign_rhs_code (assign);
    7162     16661623 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    7163              :     {
    7164      5226282 :       lhs = gimple_cond_lhs (cond);
    7165      5226282 :       code = gimple_cond_code (cond);
    7166              :     }
    7167              : 
    7168     37611843 :   if (!lhs
    7169     34512908 :       || TREE_CODE (lhs) != SSA_NAME
    7170     67596871 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    7171              :     return false;
    7172              : 
    7173      2070100 :   if (code != ERROR_MARK)
    7174              :     {
    7175      1812025 :       switch (code)
    7176              :         {
    7177              :         CASE_CONVERT:
    7178              :         case SSA_NAME:
    7179              :         case BIT_NOT_EXPR:
    7180              :         case BIT_IOR_EXPR:
    7181              :         case BIT_XOR_EXPR:
    7182              :         case BIT_AND_EXPR:
    7183              :           return true;
    7184              : 
    7185      1422898 :         default:
    7186      1422898 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    7187              :         }
    7188              :     }
    7189       258075 :   else if (is_a <gphi *> (stmt_info->stmt))
    7190       150758 :     return true;
    7191              :   return false;
    7192              : }
    7193              : 
    7194              : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    7195              :    a vector mask type instead of a normal vector type.  Record the
    7196              :    result in STMT_INFO->mask_precision.  Returns true when the
    7197              :    precision changed.  */
    7198              : 
    7199              : static bool
    7200     37611843 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    7201              : {
    7202     37611843 :   if (!possible_vector_mask_operation_p (stmt_info))
    7203              :     return false;
    7204              : 
    7205              :   /* If at least one boolean input uses a vector mask type,
    7206              :      pick the mask type with the narrowest elements.
    7207              : 
    7208              :      ??? This is the traditional behavior.  It should always produce
    7209              :      the smallest number of operations, but isn't necessarily the
    7210              :      optimal choice.  For example, if we have:
    7211              : 
    7212              :        a = b & c
    7213              : 
    7214              :      where:
    7215              : 
    7216              :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    7217              :        - b also uses M16
    7218              :        - c uses a mask type for 8-bit elements (M8)
    7219              : 
    7220              :      then picking M8 gives:
    7221              : 
    7222              :        - 1 M16->M8 pack for b
    7223              :        - 1 M8 AND for a
    7224              :        - 2 M8->M16 unpacks for the user of a
    7225              : 
    7226              :      whereas picking M16 would have given:
    7227              : 
    7228              :        - 2 M8->M16 unpacks for c
    7229              :        - 2 M16 ANDs for a
    7230              : 
    7231              :      The number of operations are equal, but M16 would have given
    7232              :      a shorter dependency chain and allowed more ILP.  */
    7233      1913344 :   unsigned int precision = ~0U;
    7234      1913344 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    7235              : 
    7236              :   /* If the statement compares two values that shouldn't use vector masks,
    7237              :      try comparing the values as normal scalars instead.  */
    7238      1913344 :   tree_code code = ERROR_MARK;
    7239      1913344 :   tree op0_type;
    7240      1913344 :   unsigned int nops = -1;
    7241      1913344 :   unsigned int ops_start = 0;
    7242              : 
    7243      1913344 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    7244              :     {
    7245      1227043 :       code = gimple_assign_rhs_code (assign);
    7246      1227043 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    7247      1227043 :       nops = gimple_num_ops (assign);
    7248      1227043 :       ops_start = 1;
    7249              :     }
    7250       686301 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    7251              :     {
    7252       535543 :       code = gimple_cond_code (cond);
    7253       535543 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    7254       535543 :       nops = 2;
    7255       535543 :       ops_start = 0;
    7256              :     }
    7257              : 
    7258      1762586 :   if (code != ERROR_MARK)
    7259              :     {
    7260      5245927 :       for (unsigned int i = ops_start; i < nops; ++i)
    7261              :         {
    7262      3483341 :           tree rhs = gimple_op (stmt, i);
    7263      3483341 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    7264      1686620 :             continue;
    7265              : 
    7266      1796721 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7267      1796721 :           if (!def_stmt_info)
    7268              :             /* Don't let external or constant operands influence the choice.
    7269              :                We can convert them to whichever vector type we pick.  */
    7270       549410 :             continue;
    7271              : 
    7272      1247311 :           if (def_stmt_info->mask_precision)
    7273              :             {
    7274      1030895 :               if (precision > def_stmt_info->mask_precision)
    7275      3483341 :                 precision = def_stmt_info->mask_precision;
    7276              :             }
    7277              :         }
    7278              : 
    7279      1762586 :       if (precision == ~0U
    7280      1432424 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    7281              :         {
    7282      1236870 :           scalar_mode mode;
    7283      1236870 :           tree vectype, mask_type;
    7284      1236870 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    7285              :               /* Do not allow this to set vinfo->vector_mode, this might
    7286              :                  disrupt the result for the next iteration.  */
    7287      1236870 :               && (vectype = get_related_vectype_for_scalar_type
    7288      1480007 :                                                 (vinfo->vector_mode, op0_type))
    7289      1078829 :               && (mask_type = truth_type_for (vectype))
    7290      1078829 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    7291      1671384 :             precision = GET_MODE_BITSIZE (mode);
    7292              :         }
    7293              :     }
    7294              :   else
    7295              :     {
    7296       150758 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    7297       620065 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    7298              :         {
    7299       469307 :           tree rhs = gimple_phi_arg_def (phi, i);
    7300              : 
    7301       469307 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    7302       469307 :           if (!def_stmt_info)
    7303              :             /* Don't let external or constant operands influence the choice.
    7304              :                We can convert them to whichever vector type we pick.  */
    7305       307077 :             continue;
    7306              : 
    7307       162230 :           if (def_stmt_info->mask_precision)
    7308              :             {
    7309       137603 :               if (precision > def_stmt_info->mask_precision)
    7310       469307 :                 precision = def_stmt_info->mask_precision;
    7311              :             }
    7312              :         }
    7313              :     }
    7314              : 
    7315      1913344 :   if (stmt_info->mask_precision != precision)
    7316              :     {
    7317      1805846 :       if (dump_enabled_p ())
    7318              :         {
    7319         7809 :           if (precision == ~0U)
    7320         1858 :             dump_printf_loc (MSG_NOTE, vect_location,
    7321              :                              "using normal nonmask vectors for %G",
    7322              :                              stmt_info->stmt);
    7323              :           else
    7324         5951 :             dump_printf_loc (MSG_NOTE, vect_location,
    7325              :                              "using boolean precision %d for %G",
    7326              :                              precision, stmt_info->stmt);
    7327              :         }
    7328              : 
    7329              :       /* ???  We'd like to assert stmt_info->mask_precision == 0
    7330              :          || stmt_info->mask_precision > precision, thus that we only
    7331              :          decrease mask precisions throughout iteration, but the
    7332              :          tcc_comparison handling above means for comparisons of bools
    7333              :          we start with 8 but might increase in case the bools get mask
    7334              :          precision on their own.  */
    7335      1805846 :       stmt_info->mask_precision = precision;
    7336      1805846 :       return true;
    7337              :     }
    7338              :   return false;
    7339              : }
    7340              : 
    7341              : /* Handle vect_determine_precisions for STMT_INFO, given that we
    7342              :    have already done so for the users of its result.  */
    7343              : 
    7344              : void
    7345     36734925 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    7346              : {
    7347     36734925 :   vect_determine_min_output_precision (vinfo, stmt_info);
    7348     36734925 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    7349              :     {
    7350     20325369 :       vect_determine_precisions_from_range (stmt_info, stmt);
    7351     20325369 :       vect_determine_precisions_from_users (stmt_info, stmt);
    7352              :     }
    7353     36734925 : }
    7354              : 
    7355              : /* Walk backwards through the vectorizable region to determine the
    7356              :    values of these fields:
    7357              : 
    7358              :    - min_output_precision
    7359              :    - min_input_precision
    7360              :    - operation_precision
    7361              :    - operation_sign.  */
    7362              : 
    7363              : void
    7364       985415 : vect_determine_precisions (vec_info *vinfo)
    7365              : {
    7366       985415 :   basic_block *bbs = vinfo->bbs;
    7367       985415 :   unsigned int nbbs = vinfo->nbbs;
    7368              : 
    7369      1004156 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    7370              : 
    7371              :   /* For mask precisions we have to iterate since otherwise we do not
    7372              :      get reduction PHI precision correct.  For now do this only for
    7373              :      loop vectorization.  */
    7374      1041920 :   bool changed;
    7375      1041920 :   do
    7376              :     {
    7377      1041920 :       changed = false;
    7378     12606106 :       for (unsigned int i = 0; i < nbbs; i++)
    7379              :         {
    7380     11564186 :           basic_block bb = bbs[i];
    7381     11564186 :           for (auto gsi = gsi_start_phis (bb);
    7382     18567217 :                !gsi_end_p (gsi); gsi_next (&gsi))
    7383              :             {
    7384      7003031 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7385      7003031 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7386      6828580 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7387              :             }
    7388    118975001 :           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7389              :             {
    7390     95846629 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7391     95846629 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7392     30783263 :                 changed |= vect_determine_mask_precision (vinfo, stmt_info);
    7393              :             }
    7394              :         }
    7395              :     }
    7396      2027335 :   while (changed && is_a <loop_vec_info> (vinfo));
    7397              : 
    7398     12415923 :   for (unsigned int i = 0; i < nbbs; i++)
    7399              :     {
    7400     11430508 :       basic_block bb = bbs[nbbs - i - 1];
    7401    211704786 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    7402              :         {
    7403     94421885 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    7404     94421885 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7405     30070654 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7406              :         }
    7407     18269230 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    7408              :         {
    7409      6838722 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    7410      6838722 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    7411      6664271 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    7412              :         }
    7413              :     }
    7414       985415 : }
    7415              : 
    7416              : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    7417              : 
    7418              : struct vect_recog_func
    7419              : {
    7420              :   vect_recog_func_ptr fn;
    7421              :   const char *name;
    7422              : };
    7423              : 
    7424              : /* Note that ordering matters - the first pattern matching on a stmt is
    7425              :    taken which means usually the more complex one needs to preceed the
    7426              :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    7427              : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    7428              :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    7429              :   { vect_recog_bit_insert_pattern, "bit_insert" },
    7430              :   { vect_recog_abd_pattern, "abd" },
    7431              :   { vect_recog_over_widening_pattern, "over_widening" },
    7432              :   /* Must come after over_widening, which narrows the shift as much as
    7433              :      possible beforehand.  */
    7434              :   { vect_recog_average_pattern, "average" },
    7435              :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    7436              :   { vect_recog_mulhs_pattern, "mult_high" },
    7437              :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    7438              :   { vect_recog_widen_mult_pattern, "widen_mult" },
    7439              :   { vect_recog_dot_prod_pattern, "dot_prod" },
    7440              :   { vect_recog_sad_pattern, "sad" },
    7441              :   { vect_recog_widen_sum_pattern, "widen_sum" },
    7442              :   { vect_recog_pow_pattern, "pow" },
    7443              :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    7444              :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    7445              :   { vect_recog_widen_shift_pattern, "widen_shift" },
    7446              :   { vect_recog_rotate_pattern, "rotate" },
    7447              :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    7448              :   { vect_recog_divmod_pattern, "divmod" },
    7449              :   { vect_recog_mod_var_pattern, "modvar" },
    7450              :   { vect_recog_mult_pattern, "mult" },
    7451              :   { vect_recog_sat_add_pattern, "sat_add" },
    7452              :   { vect_recog_sat_sub_pattern, "sat_sub" },
    7453              :   { vect_recog_sat_trunc_pattern, "sat_trunc" },
    7454              :   { vect_recog_gcond_pattern, "gcond" },
    7455              :   { vect_recog_bool_pattern, "bool" },
    7456              :   /* This must come before mask conversion, and includes the parts
    7457              :      of mask conversion that are needed for gather and scatter
    7458              :      internal functions.  */
    7459              :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    7460              :   { vect_recog_cond_store_pattern, "cond_store" },
    7461              :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    7462              :   { vect_recog_widen_plus_pattern, "widen_plus" },
    7463              :   { vect_recog_widen_minus_pattern, "widen_minus" },
    7464              :   { vect_recog_widen_abd_pattern, "widen_abd" },
    7465              :   /* These must come after the double widening ones.  */
    7466              : };
    7467              : 
    7468              : /* Mark statements that are involved in a pattern.  */
    7469              : 
    7470              : void
    7471       964155 : vect_mark_pattern_stmts (vec_info *vinfo,
    7472              :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    7473              :                          tree pattern_vectype)
    7474              : {
    7475       964155 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    7476       964155 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7477              : 
    7478       964155 :   gimple *orig_pattern_stmt = NULL;
    7479       964155 :   if (is_pattern_stmt_p (orig_stmt_info))
    7480              :     {
    7481              :       /* We're replacing a statement in an existing pattern definition
    7482              :          sequence.  */
    7483        10224 :       orig_pattern_stmt = orig_stmt_info->stmt;
    7484        10224 :       if (dump_enabled_p ())
    7485          664 :         dump_printf_loc (MSG_NOTE, vect_location,
    7486              :                          "replacing earlier pattern %G", orig_pattern_stmt);
    7487              : 
    7488              :       /* To keep the book-keeping simple, just swap the lhs of the
    7489              :          old and new statements, so that the old one has a valid but
    7490              :          unused lhs.  */
    7491        10224 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    7492        10224 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    7493        10224 :       gimple_set_lhs (pattern_stmt, old_lhs);
    7494              : 
    7495        10224 :       if (dump_enabled_p ())
    7496          664 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    7497              : 
    7498              :       /* Switch to the statement that ORIG replaces.  */
    7499        10224 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    7500              : 
    7501              :       /* We shouldn't be replacing the main pattern statement.  */
    7502        10224 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    7503              :                   != orig_pattern_stmt);
    7504              :     }
    7505              : 
    7506       964155 :   if (def_seq)
    7507              :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    7508      2137208 :          !gsi_end_p (si); gsi_next (&si))
    7509              :       {
    7510      1288883 :         if (dump_enabled_p ())
    7511        24004 :           dump_printf_loc (MSG_NOTE, vect_location,
    7512              :                            "extra pattern stmt: %G", gsi_stmt (si));
    7513      1288883 :         stmt_vec_info pattern_stmt_info
    7514      1288883 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    7515              :                                     orig_stmt_info, pattern_vectype);
    7516              :         /* Stmts in the def sequence are not vectorizable cycle or
    7517              :            induction defs, instead they should all be vect_internal_def
    7518              :            feeding the main pattern stmt which retains this def type.  */
    7519      1288883 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    7520              :       }
    7521              : 
    7522       964155 :   if (orig_pattern_stmt)
    7523              :     {
    7524        10224 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    7525              :                               orig_stmt_info, pattern_vectype);
    7526              : 
    7527              :       /* Insert all the new pattern statements before the original one.  */
    7528        10224 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7529        10224 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    7530              :                                                orig_def_seq);
    7531        10224 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    7532        10224 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    7533              : 
    7534              :       /* Remove the pattern statement that this new pattern replaces.  */
    7535        10224 :       gsi_remove (&gsi, false);
    7536              :     }
    7537              :   else
    7538       953931 :     vect_set_pattern_stmt (vinfo,
    7539              :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    7540              : 
    7541              :   /* For any conditionals mark them as vect_condition_def.  */
    7542       964155 :   if (is_a <gcond *> (pattern_stmt))
    7543       359198 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    7544              : 
    7545              :   /* Transfer reduction path info to the pattern.  */
    7546       964155 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    7547              :     {
    7548        13812 :       gimple_match_op op;
    7549        13812 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    7550            0 :         gcc_unreachable ();
    7551        13812 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    7552              :       /* Search the pattern def sequence and the main pattern stmt.  Note
    7553              :          we may have inserted all into a containing pattern def sequence
    7554              :          so the following is a bit awkward.  */
    7555        13812 :       gimple_stmt_iterator si;
    7556        13812 :       gimple *s;
    7557        13812 :       if (def_seq)
    7558              :         {
    7559        13076 :           si = gsi_start (def_seq);
    7560        13076 :           s = gsi_stmt (si);
    7561        13076 :           gsi_next (&si);
    7562              :         }
    7563              :       else
    7564              :         {
    7565              :           si = gsi_none ();
    7566              :           s = pattern_stmt;
    7567              :         }
    7568        29689 :       do
    7569              :         {
    7570        29689 :           bool found = false;
    7571        29689 :           if (gimple_extract_op (s, &op))
    7572              :             {
    7573        73612 :               for (unsigned i = 0; i < op.num_ops; ++i)
    7574        57735 :                 if (op.ops[i] == lookfor)
    7575              :                   {
    7576        13812 :                     STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7577        13812 :                     lookfor = gimple_get_lhs (s);
    7578        13812 :                     found = true;
    7579        13812 :                     break;
    7580              :                   }
    7581              :               /* Try harder to find a mid-entry into an earlier pattern
    7582              :                  sequence.  Likewise an entry to a stmt skipping a conversion
    7583              :                  on an input.  This means that the initial 'lookfor' was
    7584              :                  bogus.  */
    7585        13812 :               if (!found)
    7586              :                 {
    7587        34747 :                   for (unsigned i = 0; i < op.num_ops; ++i)
    7588        18870 :                     if (TREE_CODE (op.ops[i]) == SSA_NAME)
    7589        15877 :                       if (auto def = vinfo->lookup_def (op.ops[i]))
    7590        15686 :                         if (vect_is_reduction (def)
    7591        15686 :                             || (is_a <gphi *> (def->stmt)
    7592            0 :                                 && STMT_VINFO_REDUC_DEF (def) != NULL))
    7593              :                           {
    7594            0 :                             STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7595            0 :                             lookfor = gimple_get_lhs (s);
    7596            0 :                             found = true;
    7597            0 :                             break;
    7598              :                           }
    7599              :                 }
    7600              :             }
    7601        29689 :           if (s == pattern_stmt)
    7602              :             {
    7603        13812 :               if (!found && dump_enabled_p ())
    7604            0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7605              :                                  "failed to update reduction index.\n");
    7606        13812 :               break;
    7607              :             }
    7608        15877 :           if (gsi_end_p (si))
    7609              :             s = pattern_stmt;
    7610              :           else
    7611              :             {
    7612         2801 :               s = gsi_stmt (si);
    7613         2801 :               if (s == pattern_stmt)
    7614              :                 /* Found the end inside a bigger pattern def seq.  */
    7615              :                 si = gsi_none ();
    7616              :               else
    7617         2801 :                 gsi_next (&si);
    7618              :             }
    7619              :         } while (1);
    7620              :     }
    7621       964155 : }
    7622              : 
    7623              : /* Function vect_pattern_recog_1
    7624              : 
    7625              :    Input:
    7626              :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7627              :         computation pattern.
    7628              :    STMT_INFO: A stmt from which the pattern search should start.
    7629              : 
    7630              :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7631              :    a sequence of statements that has the same functionality and can be
    7632              :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7633              :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7634              :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7635              :    statement, having first checked that the target supports the new operation
    7636              :    in that type.
    7637              : 
    7638              :    This function also does some bookkeeping, as explained in the documentation
    7639              :    for vect_recog_pattern.  */
    7640              : 
    7641              : static void
    7642    979595703 : vect_pattern_recog_1 (vec_info *vinfo,
    7643              :                       const vect_recog_func &recog_func, stmt_vec_info stmt_info)
    7644              : {
    7645    979595703 :   gimple *pattern_stmt;
    7646    979595703 :   tree pattern_vectype;
    7647              : 
    7648              :   /* If this statement has already been replaced with pattern statements,
    7649              :      leave the original statement alone, since the first match wins.
    7650              :      Instead try to match against the definition statements that feed
    7651              :      the main pattern statement.  */
    7652    979595703 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7653              :     {
    7654     12237581 :       gimple_stmt_iterator gsi;
    7655     12237581 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7656     29572356 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7657     17334775 :         vect_pattern_recog_1 (vinfo, recog_func,
    7658              :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7659              :       return;
    7660              :     }
    7661              : 
    7662    967358122 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7663    967358122 :   pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
    7664    967358122 :   if (!pattern_stmt)
    7665              :     {
    7666              :       /* Clear any half-formed pattern definition sequence.  */
    7667    966393967 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7668    966393967 :       return;
    7669              :     }
    7670              : 
    7671              :   /* Found a vectorizable pattern.  */
    7672       964155 :   if (dump_enabled_p ())
    7673        18187 :     dump_printf_loc (MSG_NOTE, vect_location,
    7674              :                      "%s pattern recognized: %G",
    7675        18187 :                      recog_func.name, pattern_stmt);
    7676              : 
    7677              :   /* Mark the stmts that are involved in the pattern. */
    7678       964155 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7679              : }
    7680              : 
    7681              : 
    7682              : /* Function vect_pattern_recog
    7683              : 
    7684              :    Input:
    7685              :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7686              :         computation idioms.
    7687              : 
    7688              :    Output - for each computation idiom that is detected we create a new stmt
    7689              :         that provides the same functionality and that can be vectorized.  We
    7690              :         also record some information in the struct_stmt_info of the relevant
    7691              :         stmts, as explained below:
    7692              : 
    7693              :    At the entry to this function we have the following stmts, with the
    7694              :    following initial value in the STMT_VINFO fields:
    7695              : 
    7696              :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7697              :          S1: a_i = ....                 -       -               -
    7698              :          S2: a_2 = ..use(a_i)..         -       -               -
    7699              :          S3: a_1 = ..use(a_2)..         -       -               -
    7700              :          S4: a_0 = ..use(a_1)..         -       -               -
    7701              :          S5: ... = ..use(a_0)..         -       -               -
    7702              : 
    7703              :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7704              :    represented by a single stmt.  We then:
    7705              :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7706              :      inserted into the code)
    7707              :    - fill in the STMT_VINFO fields as follows:
    7708              : 
    7709              :                                   in_pattern_p  related_stmt    vec_stmt
    7710              :          S1: a_i = ....                 -       -               -
    7711              :          S2: a_2 = ..use(a_i)..         -       -               -
    7712              :          S3: a_1 = ..use(a_2)..         -       -               -
    7713              :          S4: a_0 = ..use(a_1)..         true    S6              -
    7714              :           '---> S6: a_new = ....        -       S4              -
    7715              :          S5: ... = ..use(a_0)..         -       -               -
    7716              : 
    7717              :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7718              :    to each other through the RELATED_STMT field).
    7719              : 
    7720              :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7721              :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7722              :    remain irrelevant unless used by stmts other than S4.
    7723              : 
    7724              :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7725              :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7726              :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7727              :    S4 will be skipped, and S5 will be vectorized as usual:
    7728              : 
    7729              :                                   in_pattern_p  related_stmt    vec_stmt
    7730              :          S1: a_i = ....                 -       -               -
    7731              :          S2: a_2 = ..use(a_i)..         -       -               -
    7732              :          S3: a_1 = ..use(a_2)..         -       -               -
    7733              :        > VS6: va_new = ....             -       -               -
    7734              :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7735              :           '---> S6: a_new = ....        -       S4              VS6
    7736              :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7737              :          S5: ... = ..use(a_0)..         -       -               -
    7738              : 
    7739              :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7740              :    elsewhere), and we'll end up with:
    7741              : 
    7742              :         VS6: va_new = ....
    7743              :         VS5: ... = ..vuse(va_new)..
    7744              : 
    7745              :    In case of more than one pattern statements, e.g., widen-mult with
    7746              :    intermediate type:
    7747              : 
    7748              :      S1  a_t = ;
    7749              :      S2  a_T = (TYPE) a_t;
    7750              :            '--> S3: a_it = (interm_type) a_t;
    7751              :      S4  prod_T = a_T * CONST;
    7752              :            '--> S5: prod_T' = a_it w* CONST;
    7753              : 
    7754              :    there may be other users of a_T outside the pattern.  In that case S2 will
    7755              :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7756              :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7757              :    be recorded in S3.  */
    7758              : 
    7759              : void
    7760       985415 : vect_pattern_recog (vec_info *vinfo)
    7761              : {
    7762       985415 :   basic_block *bbs = vinfo->bbs;
    7763       985415 :   unsigned int nbbs = vinfo->nbbs;
    7764              : 
    7765       985415 :   vect_determine_precisions (vinfo);
    7766              : 
    7767       985415 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7768              : 
    7769              :   /* Scan through the stmts in the region, applying the pattern recognition
    7770              :      functions starting at each stmt visited.  */
    7771     12415923 :   for (unsigned i = 0; i < nbbs; i++)
    7772              :     {
    7773     11430508 :       basic_block bb = bbs[i];
    7774              : 
    7775    117282901 :       for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7776              :         {
    7777     94421885 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7778              : 
    7779     94421885 :           if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7780     64351231 :             continue;
    7781              : 
    7782              :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7783    992331582 :           for (const auto &func_ptr : vect_vect_recog_func_ptrs)
    7784    962260928 :             vect_pattern_recog_1 (vinfo, func_ptr,
    7785              :                                   stmt_info);
    7786              :         }
    7787              :     }
    7788              : 
    7789              :   /* After this no more add_stmt calls are allowed.  */
    7790       985415 :   vinfo->stmt_vec_info_ro = true;
    7791       985415 : }
    7792              : 
    7793              : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7794              :    or internal_fn contained in ch, respectively.  */
    7795              : gimple *
    7796       152607 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7797              : {
    7798       152607 :   gcc_assert (op0 != NULL_TREE);
    7799       152607 :   if (ch.is_tree_code ())
    7800       152607 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7801              : 
    7802            0 :   gcc_assert (ch.is_internal_fn ());
    7803            0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7804              :                                              op1 == NULL_TREE ? 1 : 2,
    7805              :                                              op0, op1);
    7806            0 :   gimple_call_set_lhs (stmt, lhs);
    7807            0 :   return stmt;
    7808              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.