LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.2 % 2835 2501
Test Date: 2025-06-21 16:26:05 Functions: 92.9 % 84 78
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Analysis Utilities for Loop Vectorization.
       2                 :             :    Copyright (C) 2006-2025 Free Software Foundation, Inc.
       3                 :             :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.  */
      20                 :             : 
      21                 :             : #include "config.h"
      22                 :             : #include "system.h"
      23                 :             : #include "coretypes.h"
      24                 :             : #include "backend.h"
      25                 :             : #include "rtl.h"
      26                 :             : #include "tree.h"
      27                 :             : #include "gimple.h"
      28                 :             : #include "gimple-iterator.h"
      29                 :             : #include "gimple-fold.h"
      30                 :             : #include "ssa.h"
      31                 :             : #include "expmed.h"
      32                 :             : #include "optabs-tree.h"
      33                 :             : #include "insn-config.h"
      34                 :             : #include "recog.h"            /* FIXME: for insn_data */
      35                 :             : #include "fold-const.h"
      36                 :             : #include "stor-layout.h"
      37                 :             : #include "tree-eh.h"
      38                 :             : #include "gimplify.h"
      39                 :             : #include "gimple-iterator.h"
      40                 :             : #include "gimple-fold.h"
      41                 :             : #include "gimplify-me.h"
      42                 :             : #include "cfgloop.h"
      43                 :             : #include "tree-vectorizer.h"
      44                 :             : #include "dumpfile.h"
      45                 :             : #include "builtins.h"
      46                 :             : #include "internal-fn.h"
      47                 :             : #include "case-cfn-macros.h"
      48                 :             : #include "fold-const-call.h"
      49                 :             : #include "attribs.h"
      50                 :             : #include "cgraph.h"
      51                 :             : #include "omp-simd-clone.h"
      52                 :             : #include "predict.h"
      53                 :             : #include "tree-vector-builder.h"
      54                 :             : #include "tree-ssa-loop-ivopts.h"
      55                 :             : #include "vec-perm-indices.h"
      56                 :             : #include "gimple-range.h"
      57                 :             : #include "alias.h"
      58                 :             : 
      59                 :             : 
      60                 :             : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      61                 :             :    in the first operand.  Disentangling this is future work, the
      62                 :             :    IL is properly transfered to VEC_COND_EXPRs with separate compares.  */
      63                 :             : 
      64                 :             : 
      65                 :             : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      66                 :             :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      67                 :             : 
      68                 :             : bool
      69                 :    11451512 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      70                 :             : {
      71                 :    11451512 :   int_range_max vr;
      72                 :    11451512 :   tree vr_min, vr_max;
      73                 :    22903024 :   get_range_query (cfun)->range_of_expr (vr, var);
      74                 :    11451512 :   if (vr.undefined_p ())
      75                 :          67 :     vr.set_varying (TREE_TYPE (var));
      76                 :    11451512 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      77                 :    11451512 :   *min_value = wi::to_wide (vr_min);
      78                 :    11451512 :   *max_value = wi::to_wide (vr_max);
      79                 :    11451512 :   wide_int nonzero = get_nonzero_bits (var);
      80                 :    11451512 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      81                 :    11451512 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      82                 :             :                                          nonzero, sgn) == VR_RANGE)
      83                 :             :     {
      84                 :     5438268 :       if (dump_enabled_p ())
      85                 :             :         {
      86                 :       72686 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      87                 :       72686 :           dump_printf (MSG_NOTE, " has range [");
      88                 :       72686 :           dump_hex (MSG_NOTE, *min_value);
      89                 :       72686 :           dump_printf (MSG_NOTE, ", ");
      90                 :       72686 :           dump_hex (MSG_NOTE, *max_value);
      91                 :       72686 :           dump_printf (MSG_NOTE, "]\n");
      92                 :             :         }
      93                 :     5438268 :       return true;
      94                 :             :     }
      95                 :             :   else
      96                 :             :     {
      97                 :     6013244 :       if (dump_enabled_p ())
      98                 :             :         {
      99                 :       85187 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     100                 :       85187 :           dump_printf (MSG_NOTE, " has no range info\n");
     101                 :             :         }
     102                 :     6013244 :       return false;
     103                 :             :     }
     104                 :    11451512 : }
     105                 :             : 
     106                 :             : /* Report that we've found an instance of pattern PATTERN in
     107                 :             :    statement STMT.  */
     108                 :             : 
     109                 :             : static void
     110                 :     1057984 : vect_pattern_detected (const char *name, gimple *stmt)
     111                 :             : {
     112                 :     1057984 :   if (dump_enabled_p ())
     113                 :       25155 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     114                 :     1057984 : }
     115                 :             : 
     116                 :             : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     117                 :             :    return the pattern statement's stmt_vec_info.  Set its vector type to
     118                 :             :    VECTYPE if it doesn't have one already.  */
     119                 :             : 
     120                 :             : static stmt_vec_info
     121                 :     2004514 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     122                 :             :                         stmt_vec_info orig_stmt_info, tree vectype)
     123                 :             : {
     124                 :     2004514 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     125                 :     2004514 :   if (pattern_stmt_info == NULL)
     126                 :     1202400 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     127                 :     2004514 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     128                 :             : 
     129                 :     2004514 :   pattern_stmt_info->pattern_stmt_p = true;
     130                 :     2004514 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     131                 :     2004514 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     132                 :     2004514 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     133                 :     2004514 :   STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
     134                 :     2004514 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     135                 :             :     {
     136                 :     2134797 :       gcc_assert (!vectype
     137                 :             :                   || is_a <gcond *> (pattern_stmt)
     138                 :             :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     139                 :             :                       == vect_use_mask_type_p (orig_stmt_info)));
     140                 :     1210737 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     141                 :     1210737 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     142                 :             :     }
     143                 :     2004514 :   return pattern_stmt_info;
     144                 :             : }
     145                 :             : 
     146                 :             : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     147                 :             :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     148                 :             :    have one already.  */
     149                 :             : 
     150                 :             : static void
     151                 :      844056 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     152                 :             :                        stmt_vec_info orig_stmt_info, tree vectype)
     153                 :             : {
     154                 :      844056 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     155                 :      844056 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     156                 :           0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     157                 :      814049 : }
     158                 :             : 
     159                 :             : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     160                 :             :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     161                 :             :    be different from the vector type of the final pattern statement.
     162                 :             :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     163                 :             :    from which it was derived.  */
     164                 :             : 
     165                 :             : static inline void
     166                 :     1117030 : append_pattern_def_seq (vec_info *vinfo,
     167                 :             :                         stmt_vec_info stmt_info, gimple *new_stmt,
     168                 :             :                         tree vectype = NULL_TREE,
     169                 :             :                         tree scalar_type_for_mask = NULL_TREE)
     170                 :             : {
     171                 :     1816932 :   gcc_assert (!scalar_type_for_mask
     172                 :             :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     173                 :     1117030 :   if (vectype)
     174                 :             :     {
     175                 :      793777 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     176                 :      793777 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     177                 :      793777 :       if (scalar_type_for_mask)
     178                 :      417128 :         new_stmt_info->mask_precision
     179                 :      834256 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     180                 :             :     }
     181                 :     1117030 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     182                 :             :                                       new_stmt);
     183                 :     1117030 : }
     184                 :             : 
     185                 :             : 
     186                 :             : /* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
     187                 :             :    statements are not vectorized but are materialized as scalar in the loop
     188                 :             :    preheader.  */
     189                 :             : 
     190                 :             : static inline void
     191                 :        1229 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
     192                 :             : {
     193                 :        1229 :   gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
     194                 :             : }
     195                 :             : 
     196                 :             : /* The caller wants to perform new operations on vect_external variable
     197                 :             :    VAR, so that the result of the operations would also be vect_external.
     198                 :             :    Return the edge on which the operations can be performed, if one exists.
     199                 :             :    Return null if the operations should instead be treated as part of
     200                 :             :    the pattern that needs them.  */
     201                 :             : 
     202                 :             : static edge
     203                 :        8506 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     204                 :             : {
     205                 :        8506 :   edge e = NULL;
     206                 :        8506 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     207                 :             :     {
     208                 :         525 :       e = loop_preheader_edge (loop_vinfo->loop);
     209                 :         525 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     210                 :             :         {
     211                 :         426 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     212                 :         426 :           if (bb == NULL
     213                 :         426 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     214                 :             :             e = NULL;
     215                 :             :         }
     216                 :             :     }
     217                 :        8506 :   return e;
     218                 :             : }
     219                 :             : 
     220                 :             : /* Return true if the target supports a vector version of CODE,
     221                 :             :    where CODE is known to map to a direct optab with the given SUBTYPE.
     222                 :             :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     223                 :             :    specifies the type of the scalar result.
     224                 :             : 
     225                 :             :    If CODE allows the inputs and outputs to have different type
     226                 :             :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     227                 :             :    than the output mode that determines the appropriate target pattern.
     228                 :             :    Operand 0 of the target pattern then specifies the mode that the output
     229                 :             :    must have.
     230                 :             : 
     231                 :             :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     232                 :             :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     233                 :             :    is nonnull.  */
     234                 :             : 
     235                 :             : static bool
     236                 :        2137 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     237                 :             :                                  tree itype, tree *vecotype_out,
     238                 :             :                                  tree *vecitype_out = NULL,
     239                 :             :                                  enum optab_subtype subtype = optab_default)
     240                 :             : {
     241                 :        2137 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     242                 :        2137 :   if (!vecitype)
     243                 :             :     return false;
     244                 :             : 
     245                 :        2137 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     246                 :        2137 :   if (!vecotype)
     247                 :             :     return false;
     248                 :             : 
     249                 :        2030 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     250                 :        2030 :   if (!optab)
     251                 :             :     return false;
     252                 :             : 
     253                 :        2030 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     254                 :        2030 :   if (icode == CODE_FOR_nothing
     255                 :        2030 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     256                 :        1804 :     return false;
     257                 :             : 
     258                 :         226 :   *vecotype_out = vecotype;
     259                 :         226 :   if (vecitype_out)
     260                 :         226 :     *vecitype_out = vecitype;
     261                 :             :   return true;
     262                 :             : }
     263                 :             : 
     264                 :             : /* Return true if the target supports a vector version of CODE,
     265                 :             :    where CODE is known to map to a conversion optab with the given SUBTYPE.
     266                 :             :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     267                 :             :    specifies the type of the scalar result.
     268                 :             : 
     269                 :             :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     270                 :             :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     271                 :             :    is nonnull.  */
     272                 :             : 
     273                 :             : static bool
     274                 :         966 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
     275                 :             :                                  tree itype, tree *vecotype_out,
     276                 :             :                                  tree *vecitype_out = NULL,
     277                 :             :                                  enum optab_subtype subtype = optab_default)
     278                 :             : {
     279                 :         966 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     280                 :         966 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     281                 :         966 :   if (!vecitype || !vecotype)
     282                 :             :     return false;
     283                 :             : 
     284                 :         849 :   if (!directly_supported_p (code, vecotype, vecitype, subtype))
     285                 :             :     return false;
     286                 :             : 
     287                 :         423 :   *vecotype_out = vecotype;
     288                 :         423 :   if (vecitype_out)
     289                 :         423 :     *vecitype_out = vecitype;
     290                 :             :   return true;
     291                 :             : }
     292                 :             : 
     293                 :             : /* Round bit precision PRECISION up to a full element.  */
     294                 :             : 
     295                 :             : static unsigned int
     296                 :     2794893 : vect_element_precision (unsigned int precision)
     297                 :             : {
     298                 :           0 :   precision = 1 << ceil_log2 (precision);
     299                 :     4079744 :   return MAX (precision, BITS_PER_UNIT);
     300                 :             : }
     301                 :             : 
     302                 :             : /* If OP is defined by a statement that's being considered for vectorization,
     303                 :             :    return information about that statement, otherwise return NULL.  */
     304                 :             : 
     305                 :             : static stmt_vec_info
     306                 :     1552943 : vect_get_internal_def (vec_info *vinfo, tree op)
     307                 :             : {
     308                 :     1552943 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     309                 :     1552943 :   if (def_stmt_info
     310                 :     1494694 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     311                 :     1484424 :     return vect_stmt_to_vectorize (def_stmt_info);
     312                 :             :   return NULL;
     313                 :             : }
     314                 :             : 
     315                 :             : /* Holds information about an input operand after some sign changes
     316                 :             :    and type promotions have been peeled away.  */
     317                 :             : class vect_unpromoted_value {
     318                 :             : public:
     319                 :             :   vect_unpromoted_value ();
     320                 :             : 
     321                 :             :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     322                 :             : 
     323                 :             :   /* The value obtained after peeling away zero or more casts.  */
     324                 :             :   tree op;
     325                 :             : 
     326                 :             :   /* The type of OP.  */
     327                 :             :   tree type;
     328                 :             : 
     329                 :             :   /* The definition type of OP.  */
     330                 :             :   vect_def_type dt;
     331                 :             : 
     332                 :             :   /* If OP is the result of peeling at least one cast, and if the cast
     333                 :             :      of OP itself is a vectorizable statement, CASTER identifies that
     334                 :             :      statement, otherwise it is null.  */
     335                 :             :   stmt_vec_info caster;
     336                 :             : };
     337                 :             : 
     338                 :   283257979 : inline vect_unpromoted_value::vect_unpromoted_value ()
     339                 :   283257979 :   : op (NULL_TREE),
     340                 :   283257979 :     type (NULL_TREE),
     341                 :   283257979 :     dt (vect_uninitialized_def),
     342                 :     2559239 :     caster (NULL)
     343                 :             : {
     344                 :             : }
     345                 :             : 
     346                 :             : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     347                 :             :    statement that casts it to CASTER_IN.  */
     348                 :             : 
     349                 :             : inline void
     350                 :    10157635 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     351                 :             :                                stmt_vec_info caster_in)
     352                 :             : {
     353                 :    10157635 :   op = op_in;
     354                 :    10157635 :   type = TREE_TYPE (op);
     355                 :    10157635 :   dt = dt_in;
     356                 :    10157635 :   caster = caster_in;
     357                 :    10157635 : }
     358                 :             : 
     359                 :             : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     360                 :             :    to reach some vectorizable inner operand OP', continuing as long as it
     361                 :             :    is possible to convert OP' back to OP using a possible sign change
     362                 :             :    followed by a possible promotion P.  Return this OP', or null if OP is
     363                 :             :    not a vectorizable SSA name.  If there is a promotion P, describe its
     364                 :             :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     365                 :             :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     366                 :             :    have more than one user.
     367                 :             : 
     368                 :             :    A successful return means that it is possible to go from OP' to OP
     369                 :             :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     370                 :             :    whereas the cast from UNPROM to OP might be a promotion, a sign
     371                 :             :    change, or a nop.
     372                 :             : 
     373                 :             :    E.g. say we have:
     374                 :             : 
     375                 :             :        signed short *ptr = ...;
     376                 :             :        signed short C = *ptr;
     377                 :             :        unsigned short B = (unsigned short) C;    // sign change
     378                 :             :        signed int A = (signed int) B;            // unsigned promotion
     379                 :             :        ...possible other uses of A...
     380                 :             :        unsigned int OP = (unsigned int) A;       // sign change
     381                 :             : 
     382                 :             :    In this case it's possible to go directly from C to OP using:
     383                 :             : 
     384                 :             :        OP = (unsigned int) (unsigned short) C;
     385                 :             :             +------------+ +--------------+
     386                 :             :                promotion      sign change
     387                 :             : 
     388                 :             :    so OP' would be C.  The input to the promotion is B, so UNPROM
     389                 :             :    would describe B.  */
     390                 :             : 
     391                 :             : static tree
     392                 :     7583548 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     393                 :             :                                       vect_unpromoted_value *unprom,
     394                 :             :                                       bool *single_use_p = NULL)
     395                 :             : {
     396                 :     7583548 :   tree op_type = TREE_TYPE (op);
     397                 :     7583548 :   if (!INTEGRAL_TYPE_P (op_type))
     398                 :             :     return NULL_TREE;
     399                 :             : 
     400                 :     7559162 :   tree res = NULL_TREE;
     401                 :     7559162 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     402                 :     7559162 :   unsigned int min_precision = orig_precision;
     403                 :     7559162 :   stmt_vec_info caster = NULL;
     404                 :     9115304 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     405                 :             :     {
     406                 :             :       /* See whether OP is simple enough to vectorize.  */
     407                 :     8913211 :       stmt_vec_info def_stmt_info;
     408                 :     8913211 :       gimple *def_stmt;
     409                 :     8913211 :       vect_def_type dt;
     410                 :     8913211 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     411                 :             :         break;
     412                 :             : 
     413                 :             :       /* If OP is the input of a demotion, skip over it to see whether
     414                 :             :          OP is itself the result of a promotion.  If so, the combined
     415                 :             :          effect of the promotion and the demotion might fit the required
     416                 :             :          pattern, otherwise neither operation fits.
     417                 :             : 
     418                 :             :          This copes with cases such as the result of an arithmetic
     419                 :             :          operation being truncated before being stored, and where that
     420                 :             :          arithmetic operation has been recognized as an over-widened one.  */
     421                 :     8908243 :       if (TYPE_PRECISION (op_type) <= min_precision)
     422                 :             :         {
     423                 :             :           /* Use OP as the UNPROM described above if we haven't yet
     424                 :             :              found a promotion, or if using the new input preserves the
     425                 :             :              sign of the previous promotion.  */
     426                 :     8764035 :           if (!res
     427                 :     1312607 :               || TYPE_PRECISION (unprom->type) == orig_precision
     428                 :       40047 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
     429                 :     8801597 :               || (TYPE_UNSIGNED (op_type)
     430                 :       26158 :                   && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
     431                 :             :             {
     432                 :     8727592 :               unprom->set_op (op, dt, caster);
     433                 :     8727592 :               min_precision = TYPE_PRECISION (op_type);
     434                 :             :             }
     435                 :             :           /* Stop if we've already seen a promotion and if this
     436                 :             :              conversion does more than change the sign.  */
     437                 :       36443 :           else if (TYPE_PRECISION (op_type)
     438                 :       36443 :                    != TYPE_PRECISION (unprom->type))
     439                 :             :             break;
     440                 :             : 
     441                 :             :           /* The sequence now extends to OP.  */
     442                 :             :           res = op;
     443                 :             :         }
     444                 :             : 
     445                 :             :       /* See whether OP is defined by a cast.  Record it as CASTER if
     446                 :             :          the cast is potentially vectorizable.  */
     447                 :     8908202 :       if (!def_stmt)
     448                 :             :         break;
     449                 :     8714321 :       caster = def_stmt_info;
     450                 :             : 
     451                 :             :       /* Ignore pattern statements, since we don't link uses for them.  */
     452                 :     8714321 :       if (caster
     453                 :     8714321 :           && single_use_p
     454                 :     1623131 :           && !STMT_VINFO_RELATED_STMT (caster)
     455                 :    10174357 :           && !has_single_use (res))
     456                 :      962629 :         *single_use_p = false;
     457                 :             : 
     458                 :    16071390 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     459                 :     5686695 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     460                 :             :         break;
     461                 :             : 
     462                 :             :       /* Continue with the input to the cast.  */
     463                 :     1556142 :       op = gimple_assign_rhs1 (def_stmt);
     464                 :     1556142 :       op_type = TREE_TYPE (op);
     465                 :             :     }
     466                 :             :   return res;
     467                 :             : }
     468                 :             : 
     469                 :             : /* OP is an integer operand to an operation that returns TYPE, and we
     470                 :             :    want to treat the operation as a widening one.  So far we can treat
     471                 :             :    it as widening from *COMMON_TYPE.
     472                 :             : 
     473                 :             :    Return true if OP is suitable for such a widening operation,
     474                 :             :    either widening from *COMMON_TYPE or from some supertype of it.
     475                 :             :    Update *COMMON_TYPE to the supertype in the latter case.
     476                 :             : 
     477                 :             :    SHIFT_P is true if OP is a shift amount.  */
     478                 :             : 
     479                 :             : static bool
     480                 :      284992 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     481                 :             :                             tree *common_type)
     482                 :             : {
     483                 :             :   /* Calculate the minimum precision required by OP, without changing
     484                 :             :      the sign of either operand.  */
     485                 :      284992 :   unsigned int precision;
     486                 :      284992 :   if (shift_p)
     487                 :             :     {
     488                 :       13328 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     489                 :             :         return false;
     490                 :       10789 :       precision = TREE_INT_CST_LOW (op);
     491                 :             :     }
     492                 :             :   else
     493                 :             :     {
     494                 :      271664 :       precision = wi::min_precision (wi::to_widest (op),
     495                 :      271664 :                                      TYPE_SIGN (*common_type));
     496                 :      271664 :       if (precision * 2 > TYPE_PRECISION (type))
     497                 :             :         return false;
     498                 :             :     }
     499                 :             : 
     500                 :             :   /* If OP requires a wider type, switch to that type.  The checks
     501                 :             :      above ensure that this is still narrower than the result.  */
     502                 :      267859 :   precision = vect_element_precision (precision);
     503                 :      267859 :   if (TYPE_PRECISION (*common_type) < precision)
     504                 :        7423 :     *common_type = build_nonstandard_integer_type
     505                 :        7423 :       (precision, TYPE_UNSIGNED (*common_type));
     506                 :             :   return true;
     507                 :             : }
     508                 :             : 
     509                 :             : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     510                 :             :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     511                 :             : 
     512                 :             : static bool
     513                 :       40543 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     514                 :             : {
     515                 :       40543 :   if (types_compatible_p (*common_type, new_type))
     516                 :             :     return true;
     517                 :             : 
     518                 :             :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     519                 :        7117 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     520                 :        7117 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     521                 :             :     return true;
     522                 :             : 
     523                 :             :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     524                 :        6593 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     525                 :        6593 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     526                 :             :     {
     527                 :         363 :       *common_type = new_type;
     528                 :         363 :       return true;
     529                 :             :     }
     530                 :             : 
     531                 :             :   /* We have mismatched signs, with the signed type being
     532                 :             :      no wider than the unsigned type.  In this case we need
     533                 :             :      a wider signed type.  */
     534                 :        6230 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     535                 :             :                                 TYPE_PRECISION (new_type));
     536                 :        6230 :   precision *= 2;
     537                 :             : 
     538                 :        6230 :   if (precision * 2 > TYPE_PRECISION (type))
     539                 :             :     return false;
     540                 :             : 
     541                 :          34 :   *common_type = build_nonstandard_integer_type (precision, false);
     542                 :          34 :   return true;
     543                 :             : }
     544                 :             : 
     545                 :             : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     546                 :             :    in which each node either performs CODE or WIDENED_CODE, and where
     547                 :             :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     548                 :             :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     549                 :             :    CODE and WIDENED_CODE are some sort of shift.
     550                 :             : 
     551                 :             :    If STMT_INFO is such a tree, return the number of leaf operands
     552                 :             :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     553                 :             :    to a type that (a) is narrower than the result of STMT_INFO and
     554                 :             :    (b) can hold all leaf operand values.
     555                 :             : 
     556                 :             :    If SUBTYPE then allow that the signs of the operands
     557                 :             :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     558                 :             :    this.
     559                 :             : 
     560                 :             :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     561                 :             :    exists.  */
     562                 :             : 
     563                 :             : static unsigned int
     564                 :   119671360 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     565                 :             :                       code_helper widened_code, bool shift_p,
     566                 :             :                       unsigned int max_nops,
     567                 :             :                       vect_unpromoted_value *unprom, tree *common_type,
     568                 :             :                       enum optab_subtype *subtype = NULL)
     569                 :             : {
     570                 :             :   /* Check for an integer operation with the right code.  */
     571                 :   119671360 :   gimple* stmt = stmt_info->stmt;
     572                 :   119671360 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     573                 :             :     return 0;
     574                 :             : 
     575                 :    96361093 :   code_helper rhs_code;
     576                 :    96361093 :   if (is_gimple_assign (stmt))
     577                 :    82488479 :     rhs_code = gimple_assign_rhs_code (stmt);
     578                 :    13872614 :   else if (is_gimple_call (stmt))
     579                 :    13872614 :     rhs_code = gimple_call_combined_fn (stmt);
     580                 :             :   else
     581                 :             :     return 0;
     582                 :             : 
     583                 :    96361093 :   if (rhs_code != code
     584                 :    96361093 :       && rhs_code != widened_code)
     585                 :             :     return 0;
     586                 :             : 
     587                 :     5870441 :   tree lhs = gimple_get_lhs (stmt);
     588                 :     5870441 :   tree type = TREE_TYPE (lhs);
     589                 :     5870441 :   if (!INTEGRAL_TYPE_P (type))
     590                 :             :     return 0;
     591                 :             : 
     592                 :             :   /* Assume that both operands will be leaf operands.  */
     593                 :     5321085 :   max_nops -= 2;
     594                 :             : 
     595                 :             :   /* Check the operands.  */
     596                 :     5321085 :   unsigned int next_op = 0;
     597                 :     6024170 :   for (unsigned int i = 0; i < 2; ++i)
     598                 :             :     {
     599                 :     5721598 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     600                 :     5721598 :       unsigned int nops = 1;
     601                 :     5721598 :       tree op = gimple_arg (stmt, i);
     602                 :     5721598 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     603                 :             :         {
     604                 :             :           /* We already have a common type from earlier operands.
     605                 :             :              Update it to account for OP.  */
     606                 :      284992 :           this_unprom->set_op (op, vect_constant_def);
     607                 :      284992 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     608                 :             :             return 0;
     609                 :             :         }
     610                 :             :       else
     611                 :             :         {
     612                 :             :           /* Only allow shifts by constants.  */
     613                 :     5436606 :           if (shift_p && i == 1)
     614                 :             :             return 0;
     615                 :             : 
     616                 :     5431811 :           if (rhs_code != code)
     617                 :             :             {
     618                 :             :               /* If rhs_code is widened_code, don't look through further
     619                 :             :                  possible promotions, there is a promotion already embedded
     620                 :             :                  in the WIDEN_*_EXPR.  */
     621                 :        3109 :               if (TREE_CODE (op) != SSA_NAME
     622                 :        3109 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     623                 :           0 :                 return 0;
     624                 :             : 
     625                 :        3109 :               stmt_vec_info def_stmt_info;
     626                 :        3109 :               gimple *def_stmt;
     627                 :        3109 :               vect_def_type dt;
     628                 :        3109 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     629                 :             :                                        &def_stmt))
     630                 :             :                 return 0;
     631                 :        3109 :               this_unprom->set_op (op, dt, NULL);
     632                 :             :             }
     633                 :     5428702 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     634                 :             :                                                           this_unprom))
     635                 :             :             return 0;
     636                 :             : 
     637                 :     5326020 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     638                 :             :             {
     639                 :             :               /* The operand isn't widened.  If STMT_INFO has the code
     640                 :             :                  for an unwidened operation, recursively check whether
     641                 :             :                  this operand is a node of the tree.  */
     642                 :     4880699 :               if (rhs_code != code
     643                 :     4880699 :                   || max_nops == 0
     644                 :     4881143 :                   || this_unprom->dt != vect_internal_def)
     645                 :             :                 return 0;
     646                 :             : 
     647                 :             :               /* Give back the leaf slot allocated above now that we're
     648                 :             :                  not treating this as a leaf operand.  */
     649                 :         444 :               max_nops += 1;
     650                 :             : 
     651                 :             :               /* Recursively process the definition of the operand.  */
     652                 :         444 :               stmt_vec_info def_stmt_info
     653                 :         444 :                 = vect_get_internal_def (vinfo, this_unprom->op);
     654                 :             : 
     655                 :         444 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     656                 :             :                                            widened_code, shift_p, max_nops,
     657                 :             :                                            this_unprom, common_type,
     658                 :             :                                            subtype);
     659                 :         444 :               if (nops == 0)
     660                 :             :                 return 0;
     661                 :             : 
     662                 :         299 :               max_nops -= nops;
     663                 :             :             }
     664                 :             :           else
     665                 :             :             {
     666                 :             :               /* Make sure that the operand is narrower than the result.  */
     667                 :      445321 :               if (TYPE_PRECISION (this_unprom->type) * 2
     668                 :      445321 :                   > TYPE_PRECISION (type))
     669                 :             :                 return 0;
     670                 :             : 
     671                 :             :               /* Update COMMON_TYPE for the new operand.  */
     672                 :      440913 :               if (i == 0)
     673                 :      400370 :                 *common_type = this_unprom->type;
     674                 :       40543 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     675                 :             :                                                  common_type))
     676                 :             :                 {
     677                 :        6196 :                   if (subtype)
     678                 :             :                     {
     679                 :             :                       /* See if we can sign extend the smaller type.  */
     680                 :         210 :                       if (TYPE_PRECISION (this_unprom->type)
     681                 :         210 :                           > TYPE_PRECISION (*common_type))
     682                 :          36 :                         *common_type = this_unprom->type;
     683                 :         210 :                       *subtype = optab_vector_mixed_sign;
     684                 :             :                     }
     685                 :             :                   else
     686                 :             :                     return 0;
     687                 :             :                 }
     688                 :             :             }
     689                 :             :         }
     690                 :      703085 :       next_op += nops;
     691                 :             :     }
     692                 :             :   return next_op;
     693                 :             : }
     694                 :             : 
     695                 :             : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     696                 :             :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     697                 :             : 
     698                 :             : static tree
     699                 :     1737439 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     700                 :             : {
     701                 :           0 :   return make_temp_ssa_name (type, stmt, "patt");
     702                 :             : }
     703                 :             : 
     704                 :             : /* STMT2_INFO describes a type conversion that could be split into STMT1
     705                 :             :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     706                 :             :    input.  Try to do this using pattern statements, returning true on
     707                 :             :    success.  */
     708                 :             : 
     709                 :             : static bool
     710                 :       31021 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     711                 :             :                       gimple *stmt1, tree vectype)
     712                 :             : {
     713                 :       31021 :   if (is_pattern_stmt_p (stmt2_info))
     714                 :             :     {
     715                 :             :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     716                 :             :          the pattern is attached.  */
     717                 :        1014 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     718                 :        1014 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     719                 :             : 
     720                 :        1014 :       if (dump_enabled_p ())
     721                 :          22 :         dump_printf_loc (MSG_NOTE, vect_location,
     722                 :             :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     723                 :             : 
     724                 :             :       /* Since STMT2_INFO is a pattern statement, we can change it
     725                 :             :          in-situ without worrying about changing the code for the
     726                 :             :          containing block.  */
     727                 :        1014 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     728                 :             : 
     729                 :        1014 :       if (dump_enabled_p ())
     730                 :             :         {
     731                 :          22 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     732                 :          22 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     733                 :             :                            stmt2_info->stmt);
     734                 :             :         }
     735                 :             : 
     736                 :        1014 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     737                 :        1014 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     738                 :             :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     739                 :             :            to the end of the definition sequence.  */
     740                 :        1011 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     741                 :             :       else
     742                 :             :         {
     743                 :             :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     744                 :             :              before it.  */
     745                 :           3 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     746                 :           3 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     747                 :             :         }
     748                 :        1014 :       return true;
     749                 :             :     }
     750                 :             :   else
     751                 :             :     {
     752                 :             :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     753                 :             :          two-statement pattern now.  */
     754                 :       30007 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     755                 :       30007 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     756                 :       30007 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     757                 :       30007 :       if (!lhs_vectype)
     758                 :             :         return false;
     759                 :             : 
     760                 :       30007 :       if (dump_enabled_p ())
     761                 :        2152 :         dump_printf_loc (MSG_NOTE, vect_location,
     762                 :             :                          "Splitting statement: %G", stmt2_info->stmt);
     763                 :             : 
     764                 :             :       /* Add STMT1 as a singleton pattern definition sequence.  */
     765                 :       30007 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     766                 :       30007 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     767                 :       30007 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     768                 :             : 
     769                 :             :       /* Build the second of the two pattern statements.  */
     770                 :       30007 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     771                 :       30007 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     772                 :       30007 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     773                 :             : 
     774                 :       30007 :       if (dump_enabled_p ())
     775                 :             :         {
     776                 :        2152 :           dump_printf_loc (MSG_NOTE, vect_location,
     777                 :             :                            "into pattern statements: %G", stmt1);
     778                 :        2152 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     779                 :             :                            (gimple *) new_stmt2);
     780                 :             :         }
     781                 :             : 
     782                 :       30007 :       return true;
     783                 :             :     }
     784                 :             : }
     785                 :             : 
     786                 :             : /* Look for the following pattern
     787                 :             :         X = x[i]
     788                 :             :         Y = y[i]
     789                 :             :         DIFF = X - Y
     790                 :             :         DAD = ABS_EXPR<DIFF>
     791                 :             : 
     792                 :             :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     793                 :             :    HALF_TYPE and UNPROM will be set should the statement be found to
     794                 :             :    be a widened operation.
     795                 :             :    DIFF_STMT will be set to the MINUS_EXPR
     796                 :             :    statement that precedes the ABS_STMT if it is a MINUS_EXPR..
     797                 :             :  */
     798                 :             : static bool
     799                 :    20314119 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     800                 :             :                                 tree *half_type,
     801                 :             :                                 vect_unpromoted_value unprom[2],
     802                 :             :                                 gassign **diff_stmt)
     803                 :             : {
     804                 :    20314119 :   if (!abs_stmt)
     805                 :             :     return false;
     806                 :             : 
     807                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     808                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     809                 :    20314119 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     810                 :    20314119 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     811                 :             :     return false;
     812                 :             : 
     813                 :       24200 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     814                 :       24200 :   tree abs_type = TREE_TYPE (abs_oprnd);
     815                 :       24200 :   if (!abs_oprnd)
     816                 :             :     return false;
     817                 :       17413 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     818                 :        7020 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     819                 :       31093 :       || TYPE_UNSIGNED (abs_type))
     820                 :             :     return false;
     821                 :             : 
     822                 :             :   /* Peel off conversions from the ABS input.  This can involve sign
     823                 :             :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     824                 :             :      or signed promotion, but it can't include unsigned promotion.
     825                 :             :      (Note that ABS of an unsigned promotion should have been folded
     826                 :             :      away before now anyway.)  */
     827                 :        6893 :   vect_unpromoted_value unprom_diff;
     828                 :        6893 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     829                 :             :                                                     &unprom_diff);
     830                 :        6893 :   if (!abs_oprnd)
     831                 :             :     return false;
     832                 :        6660 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     833                 :        6660 :       && TYPE_UNSIGNED (unprom_diff.type))
     834                 :             :     return false;
     835                 :             : 
     836                 :             :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     837                 :        6660 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     838                 :        6660 :   if (!diff_stmt_vinfo)
     839                 :             :     return false;
     840                 :             : 
     841                 :        6166 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     842                 :        6166 :   if (diff_stmt && diff
     843                 :        5254 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     844                 :        8462 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     845                 :        1291 :     *diff_stmt = diff;
     846                 :             : 
     847                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     848                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     849                 :        6166 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     850                 :        6166 :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     851                 :             :                             false, 2, unprom, half_type))
     852                 :             :     return true;
     853                 :             : 
     854                 :             :   return false;
     855                 :             : }
     856                 :             : 
     857                 :             : /* Convert UNPROM to TYPE and return the result, adding new statements
     858                 :             :    to STMT_INFO's pattern definition statements if no better way is
     859                 :             :    available.  VECTYPE is the vector form of TYPE.
     860                 :             : 
     861                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     862                 :             : 
     863                 :             : static tree
     864                 :      471025 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     865                 :             :                     vect_unpromoted_value *unprom, tree vectype,
     866                 :             :                     enum optab_subtype subtype = optab_default)
     867                 :             : {
     868                 :             :   /* Update the type if the signs differ.  */
     869                 :      471025 :   if (subtype == optab_vector_mixed_sign)
     870                 :             :     {
     871                 :         164 :       gcc_assert (!TYPE_UNSIGNED (type));
     872                 :         164 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     873                 :             :         {
     874                 :          82 :           type = unsigned_type_for (type);
     875                 :          82 :           vectype = unsigned_type_for (vectype);
     876                 :             :         }
     877                 :             :     }
     878                 :             : 
     879                 :             :   /* Check for a no-op conversion.  */
     880                 :      471025 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     881                 :      157215 :     return unprom->op;
     882                 :             : 
     883                 :             :   /* Allow the caller to create constant vect_unpromoted_values.  */
     884                 :      313810 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     885                 :      188204 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     886                 :             : 
     887                 :      125606 :   tree input = unprom->op;
     888                 :      125606 :   if (unprom->caster)
     889                 :             :     {
     890                 :       62620 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     891                 :       62620 :       tree lhs_type = TREE_TYPE (lhs);
     892                 :             : 
     893                 :             :       /* If the result of the existing cast is the right width, use it
     894                 :             :          instead of the source of the cast.  */
     895                 :       62620 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     896                 :             :         input = lhs;
     897                 :             :       /* If the precision we want is between the source and result
     898                 :             :          precisions of the existing cast, try splitting the cast into
     899                 :             :          two and tapping into a mid-way point.  */
     900                 :       60759 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     901                 :       60759 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     902                 :             :         {
     903                 :             :           /* In order to preserve the semantics of the original cast,
     904                 :             :              give the mid-way point the same signedness as the input value.
     905                 :             : 
     906                 :             :              It would be possible to use a signed type here instead if
     907                 :             :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     908                 :             :              make the sign of the midtype sensitive to the order in
     909                 :             :              which we process the statements, since the signedness of
     910                 :             :              TYPE is the signedness required by just one of possibly
     911                 :             :              many users.  Also, unsigned promotions are usually as cheap
     912                 :             :              as or cheaper than signed ones, so it's better to keep an
     913                 :             :              unsigned promotion.  */
     914                 :       31021 :           tree midtype = build_nonstandard_integer_type
     915                 :       31021 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     916                 :       31021 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     917                 :       31021 :           if (vec_midtype)
     918                 :             :             {
     919                 :       31021 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     920                 :       31021 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     921                 :             :                                                        unprom->op);
     922                 :       31021 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     923                 :             :                                          vec_midtype))
     924                 :           0 :                 append_pattern_def_seq (vinfo, stmt_info,
     925                 :             :                                         new_stmt, vec_midtype);
     926                 :             :             }
     927                 :             :         }
     928                 :             : 
     929                 :             :       /* See if we can reuse an existing result.  */
     930                 :       62620 :       if (types_compatible_p (type, TREE_TYPE (input)))
     931                 :             :         return input;
     932                 :             :     }
     933                 :             : 
     934                 :             :   /* We need a new conversion statement.  */
     935                 :      102580 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     936                 :      102580 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     937                 :             : 
     938                 :             :   /* If OP is an external value, see if we can insert the new statement
     939                 :             :      on an incoming edge.  */
     940                 :      102580 :   if (input == unprom->op && unprom->dt == vect_external_def)
     941                 :        8493 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     942                 :             :       {
     943                 :         512 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     944                 :         512 :         gcc_assert (!new_bb);
     945                 :             :         return new_op;
     946                 :             :       }
     947                 :             : 
     948                 :             :   /* As a (common) last resort, add the statement to the pattern itself.  */
     949                 :      102068 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     950                 :      102068 :   return new_op;
     951                 :             : }
     952                 :             : 
     953                 :             : /* Invoke vect_convert_input for N elements of UNPROM and store the
     954                 :             :    result in the corresponding elements of RESULT.
     955                 :             : 
     956                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     957                 :             : 
     958                 :             : static void
     959                 :      238878 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     960                 :             :                      tree *result, tree type, vect_unpromoted_value *unprom,
     961                 :             :                      tree vectype, enum optab_subtype subtype = optab_default)
     962                 :             : {
     963                 :      709675 :   for (unsigned int i = 0; i < n; ++i)
     964                 :             :     {
     965                 :             :       unsigned int j;
     966                 :      702438 :       for (j = 0; j < i; ++j)
     967                 :      231919 :         if (unprom[j].op == unprom[i].op)
     968                 :             :           break;
     969                 :             : 
     970                 :      470797 :       if (j < i)
     971                 :         278 :         result[i] = result[j];
     972                 :             :       else
     973                 :      470519 :         result[i] = vect_convert_input (vinfo, stmt_info,
     974                 :      470519 :                                         type, &unprom[i], vectype, subtype);
     975                 :             :     }
     976                 :      238878 : }
     977                 :             : 
     978                 :             : /* The caller has created a (possibly empty) sequence of pattern definition
     979                 :             :    statements followed by a single statement PATTERN_STMT.  Cast the result
     980                 :             :    of this final statement to TYPE.  If a new statement is needed, add
     981                 :             :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     982                 :             :    and return the new statement, otherwise return PATTERN_STMT as-is.
     983                 :             :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     984                 :             : 
     985                 :             : static gimple *
     986                 :      263960 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     987                 :             :                      gimple *pattern_stmt, tree vecitype)
     988                 :             : {
     989                 :      263960 :   tree lhs = gimple_get_lhs (pattern_stmt);
     990                 :      263960 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     991                 :             :     {
     992                 :      237616 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
     993                 :      237616 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     994                 :      237616 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     995                 :             :     }
     996                 :      263960 :   return pattern_stmt;
     997                 :             : }
     998                 :             : 
     999                 :             : /* Return true if STMT_VINFO describes a reduction for which reassociation
    1000                 :             :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1001                 :             :    a reduction chain and optimistically assume that all statements
    1002                 :             :    except the last allow reassociation.
    1003                 :             :    Also require it to have code CODE and to be a reduction
    1004                 :             :    in the outermost loop.  When returning true, store the operands in
    1005                 :             :    *OP0_OUT and *OP1_OUT.  */
    1006                 :             : 
    1007                 :             : static bool
    1008                 :    89328755 : vect_reassociating_reduction_p (vec_info *vinfo,
    1009                 :             :                                 stmt_vec_info stmt_info, tree_code code,
    1010                 :             :                                 tree *op0_out, tree *op1_out)
    1011                 :             : {
    1012                 :    89328755 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1013                 :     9964193 :   if (!loop_info)
    1014                 :             :     return false;
    1015                 :             : 
    1016                 :     9964193 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1017                 :    10795238 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1018                 :             :     return false;
    1019                 :             : 
    1020                 :             :   /* We don't allow changing the order of the computation in the inner-loop
    1021                 :             :      when doing outer-loop vectorization.  */
    1022                 :     2095271 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1023                 :    91323986 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1024                 :             :     return false;
    1025                 :             : 
    1026                 :     2047028 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
    1027                 :             :     {
    1028                 :      102829 :       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1029                 :      102829 :                                        code))
    1030                 :             :         return false;
    1031                 :             :     }
    1032                 :     1944199 :   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
    1033                 :             :     return false;
    1034                 :             : 
    1035                 :      100040 :   *op0_out = gimple_assign_rhs1 (assign);
    1036                 :      100040 :   *op1_out = gimple_assign_rhs2 (assign);
    1037                 :      100040 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1038                 :       31487 :     std::swap (*op0_out, *op1_out);
    1039                 :             :   return true;
    1040                 :             : }
    1041                 :             : 
    1042                 :             : /* match.pd function to match
    1043                 :             :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1044                 :             :    with conditions:
    1045                 :             :    1) @1, @2, c, d, a, b are all integral type.
    1046                 :             :    2) There's single_use for both @1 and @2.
    1047                 :             :    3) a, c have same precision.
    1048                 :             :    4) c and @1 have different precision.
    1049                 :             :    5) c, d are the same type or they can differ in sign when convert is
    1050                 :             :    truncation.
    1051                 :             : 
    1052                 :             :    record a and c and d and @3.  */
    1053                 :             : 
    1054                 :             : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1055                 :             : 
    1056                 :             : /* Function vect_recog_cond_expr_convert
    1057                 :             : 
    1058                 :             :    Try to find the following pattern:
    1059                 :             : 
    1060                 :             :    TYPE_AB A,B;
    1061                 :             :    TYPE_CD C,D;
    1062                 :             :    TYPE_E E;
    1063                 :             :    TYPE_E op_true = (TYPE_E) A;
    1064                 :             :    TYPE_E op_false = (TYPE_E) B;
    1065                 :             : 
    1066                 :             :    E = C cmp D ? op_true : op_false;
    1067                 :             : 
    1068                 :             :    where
    1069                 :             :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1070                 :             :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1071                 :             :    single_use of op_true and op_false.
    1072                 :             :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1073                 :             : 
    1074                 :             :    Input:
    1075                 :             : 
    1076                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1077                 :             :    here it starts with E = c cmp D ? op_true : op_false;
    1078                 :             : 
    1079                 :             :    Output:
    1080                 :             : 
    1081                 :             :    TYPE1 E' = C cmp D ? A : B;
    1082                 :             :    TYPE3 E = (TYPE3) E';
    1083                 :             : 
    1084                 :             :    There may extra nop_convert for A or B to handle different signness.
    1085                 :             : 
    1086                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1087                 :             : 
    1088                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1089                 :             :    stmts that constitute the pattern. In this case it will be:
    1090                 :             :    E = (TYPE3)E';
    1091                 :             :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1092                 :             : 
    1093                 :             : static gimple *
    1094                 :    29848350 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1095                 :             :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1096                 :             : {
    1097                 :    29848350 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1098                 :    20419159 :   tree lhs, match[4], temp, type, new_lhs, op2, op1;
    1099                 :    20419159 :   gimple *cond_stmt;
    1100                 :    20419159 :   gimple *pattern_stmt;
    1101                 :    29848265 :   enum tree_code code = NOP_EXPR;
    1102                 :             : 
    1103                 :    20419159 :   if (!last_stmt)
    1104                 :             :     return NULL;
    1105                 :             : 
    1106                 :    20419159 :   lhs = gimple_assign_lhs (last_stmt);
    1107                 :             : 
    1108                 :             :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1109                 :             :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1110                 :    20419159 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1111                 :             :     return NULL;
    1112                 :             : 
    1113                 :          85 :   if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
    1114                 :          24 :     code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
    1115                 :          61 :   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
    1116                 :           0 :     code = FIX_TRUNC_EXPR;
    1117                 :             : 
    1118                 :          85 :   op1 = match[1];
    1119                 :          85 :   op2 = match[2];
    1120                 :          85 :   type = TREE_TYPE (op1);
    1121                 :             :   /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
    1122                 :             :      SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
    1123                 :             :      Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
    1124                 :             :      or CONVERT_EXPR.  */
    1125                 :          85 :   if (TREE_CODE (op1) == REAL_CST)
    1126                 :             :     {
    1127                 :          12 :       op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
    1128                 :          12 :       type = TREE_TYPE (op2);
    1129                 :          12 :       if (op1 == NULL_TREE)
    1130                 :             :         return NULL;
    1131                 :             :     }
    1132                 :          73 :   else if (TREE_CODE (op2) == REAL_CST)
    1133                 :             :     {
    1134                 :           0 :       op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
    1135                 :           0 :       if (op2 == NULL_TREE)
    1136                 :             :         return NULL;
    1137                 :             :     }
    1138                 :          73 :   else if (code == NOP_EXPR)
    1139                 :             :     {
    1140                 :          61 :       if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1141                 :             :         {
    1142                 :          24 :           op2 = vect_recog_temp_ssa_var (type, NULL);
    1143                 :          24 :           gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1144                 :          24 :           append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
    1145                 :             :                                   get_vectype_for_scalar_type (vinfo, type));
    1146                 :             :         }
    1147                 :             :     }
    1148                 :             : 
    1149                 :          85 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1150                 :             : 
    1151                 :          85 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1152                 :          85 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1153                 :             :                                                  op1, op2));
    1154                 :          85 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
    1155                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    1156                 :          85 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1157                 :          85 :   pattern_stmt = gimple_build_assign (new_lhs, code, temp);
    1158                 :          85 :   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
    1159                 :             : 
    1160                 :          85 :   if (dump_enabled_p ())
    1161                 :          27 :     dump_printf_loc (MSG_NOTE, vect_location,
    1162                 :             :                      "created pattern stmt: %G", pattern_stmt);
    1163                 :             :   return pattern_stmt;
    1164                 :             : }
    1165                 :             : 
    1166                 :             : /* Function vect_recog_dot_prod_pattern
    1167                 :             : 
    1168                 :             :    Try to find the following pattern:
    1169                 :             : 
    1170                 :             :      type1a x_t
    1171                 :             :      type1b y_t;
    1172                 :             :      TYPE1 prod;
    1173                 :             :      TYPE2 sum = init;
    1174                 :             :    loop:
    1175                 :             :      sum_0 = phi <init, sum_1>
    1176                 :             :      S1  x_t = ...
    1177                 :             :      S2  y_t = ...
    1178                 :             :      S3  x_T = (TYPE1) x_t;
    1179                 :             :      S4  y_T = (TYPE1) y_t;
    1180                 :             :      S5  prod = x_T * y_T;
    1181                 :             :      [S6  prod = (TYPE2) prod;  #optional]
    1182                 :             :      S7  sum_1 = prod + sum_0;
    1183                 :             : 
    1184                 :             :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1185                 :             :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1186                 :             :    'type1a' and 'type1b' can differ.
    1187                 :             : 
    1188                 :             :    Input:
    1189                 :             : 
    1190                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1191                 :             :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1192                 :             :    will be detected.
    1193                 :             : 
    1194                 :             :    Output:
    1195                 :             : 
    1196                 :             :    * TYPE_OUT: The type of the output  of this pattern.
    1197                 :             : 
    1198                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1199                 :             :    stmts that constitute the pattern. In this case it will be:
    1200                 :             :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1201                 :             : 
    1202                 :             :    Note: The dot-prod idiom is a widening reduction pattern that is
    1203                 :             :          vectorized without preserving all the intermediate results. It
    1204                 :             :          produces only N/2 (widened) results (by summing up pairs of
    1205                 :             :          intermediate results) rather than all N results.  Therefore, we
    1206                 :             :          cannot allow this pattern when we want to get all the results and in
    1207                 :             :          the correct order (as is the case when this computation is in an
    1208                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    1209                 :             : 
    1210                 :             : static gimple *
    1211                 :    29776605 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1212                 :             :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1213                 :             : {
    1214                 :    29776605 :   tree oprnd0, oprnd1;
    1215                 :    29776605 :   gimple *last_stmt = stmt_vinfo->stmt;
    1216                 :    29776605 :   tree type, half_type;
    1217                 :    29776605 :   gimple *pattern_stmt;
    1218                 :    29776605 :   tree var;
    1219                 :             : 
    1220                 :             :   /* Look for the following pattern
    1221                 :             :           DX = (TYPE1) X;
    1222                 :             :           DY = (TYPE1) Y;
    1223                 :             :           DPROD = DX * DY;
    1224                 :             :           DDPROD = (TYPE2) DPROD;
    1225                 :             :           sum_1 = DDPROD + sum_0;
    1226                 :             :      In which
    1227                 :             :      - DX is double the size of X
    1228                 :             :      - DY is double the size of Y
    1229                 :             :      - DX, DY, DPROD all have the same type but the sign
    1230                 :             :        between X, Y and DPROD can differ.
    1231                 :             :      - sum is the same size of DPROD or bigger
    1232                 :             :      - sum has been recognized as a reduction variable.
    1233                 :             : 
    1234                 :             :      This is equivalent to:
    1235                 :             :        DPROD = X w* Y;          #widen mult
    1236                 :             :        sum_1 = DPROD w+ sum_0;  #widen summation
    1237                 :             :      or
    1238                 :             :        DPROD = X w* Y;          #widen mult
    1239                 :             :        sum_1 = DPROD + sum_0;   #summation
    1240                 :             :    */
    1241                 :             : 
    1242                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1243                 :             :      of the above pattern.  */
    1244                 :             : 
    1245                 :    29776605 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1246                 :             :                                        &oprnd0, &oprnd1))
    1247                 :             :     return NULL;
    1248                 :             : 
    1249                 :       33704 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1250                 :             : 
    1251                 :       33704 :   vect_unpromoted_value unprom_mult;
    1252                 :       33704 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1253                 :             : 
    1254                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1255                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1256                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1257                 :             :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1258                 :       33704 :   if (!oprnd0)
    1259                 :             :     return NULL;
    1260                 :             : 
    1261                 :       25462 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1262                 :       25462 :   if (!mult_vinfo)
    1263                 :             :     return NULL;
    1264                 :             : 
    1265                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1266                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1267                 :       74241 :   vect_unpromoted_value unprom0[2];
    1268                 :       24747 :   enum optab_subtype subtype = optab_vector;
    1269                 :       24747 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1270                 :             :                              false, 2, unprom0, &half_type, &subtype))
    1271                 :             :     return NULL;
    1272                 :             : 
    1273                 :             :   /* If there are two widening operations, make sure they agree on the sign
    1274                 :             :      of the extension.  The result of an optab_vector_mixed_sign operation
    1275                 :             :      is signed; otherwise, the result has the same sign as the operands.  */
    1276                 :        1000 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1277                 :        1614 :       && (subtype == optab_vector_mixed_sign
    1278                 :         614 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1279                 :         426 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1280                 :             :     return NULL;
    1281                 :             : 
    1282                 :         892 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1283                 :             : 
    1284                 :             :   /* If the inputs have mixed signs, canonicalize on using the signed
    1285                 :             :      input type for analysis.  This also helps when emulating mixed-sign
    1286                 :             :      operations using signed operations.  */
    1287                 :         892 :   if (subtype == optab_vector_mixed_sign)
    1288                 :         150 :     half_type = signed_type_for (half_type);
    1289                 :             : 
    1290                 :         892 :   tree half_vectype;
    1291                 :         892 :   if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1292                 :             :                                         type_out, &half_vectype, subtype))
    1293                 :             :     {
    1294                 :             :       /* We can emulate a mixed-sign dot-product using a sequence of
    1295                 :             :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1296                 :         475 :       if (subtype != optab_vector_mixed_sign
    1297                 :         475 :           || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
    1298                 :             :                                                DOT_PROD_EXPR, half_type,
    1299                 :             :                                                type_out, &half_vectype,
    1300                 :             :                                                optab_vector))
    1301                 :         469 :         return NULL;
    1302                 :             : 
    1303                 :           6 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1304                 :             :                                                *type_out);
    1305                 :             :     }
    1306                 :             : 
    1307                 :             :   /* Get the inputs in the appropriate types.  */
    1308                 :         423 :   tree mult_oprnd[2];
    1309                 :         423 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1310                 :             :                        unprom0, half_vectype, subtype);
    1311                 :             : 
    1312                 :         423 :   var = vect_recog_temp_ssa_var (type, NULL);
    1313                 :         423 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1314                 :             :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1315                 :             : 
    1316                 :         423 :   return pattern_stmt;
    1317                 :             : }
    1318                 :             : 
    1319                 :             : 
    1320                 :             : /* Function vect_recog_sad_pattern
    1321                 :             : 
    1322                 :             :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1323                 :             : 
    1324                 :             :      type x_t, y_t;
    1325                 :             :      signed TYPE1 diff, abs_diff;
    1326                 :             :      TYPE2 sum = init;
    1327                 :             :    loop:
    1328                 :             :      sum_0 = phi <init, sum_1>
    1329                 :             :      S1  x_t = ...
    1330                 :             :      S2  y_t = ...
    1331                 :             :      S3  x_T = (TYPE1) x_t;
    1332                 :             :      S4  y_T = (TYPE1) y_t;
    1333                 :             :      S5  diff = x_T - y_T;
    1334                 :             :      S6  abs_diff = ABS_EXPR <diff>;
    1335                 :             :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1336                 :             :      S8  sum_1 = abs_diff + sum_0;
    1337                 :             : 
    1338                 :             :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1339                 :             :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1340                 :             :    computation.
    1341                 :             : 
    1342                 :             :    Input:
    1343                 :             : 
    1344                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1345                 :             :    example, when this function is called with S8, the pattern
    1346                 :             :    {S3,S4,S5,S6,S7,S8} will be detected.
    1347                 :             : 
    1348                 :             :    Output:
    1349                 :             : 
    1350                 :             :    * TYPE_OUT: The type of the output of this pattern.
    1351                 :             : 
    1352                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1353                 :             :    stmts that constitute the pattern. In this case it will be:
    1354                 :             :         SAD_EXPR <x_t, y_t, sum_0>
    1355                 :             :   */
    1356                 :             : 
    1357                 :             : static gimple *
    1358                 :    29776188 : vect_recog_sad_pattern (vec_info *vinfo,
    1359                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1360                 :             : {
    1361                 :    29776188 :   gimple *last_stmt = stmt_vinfo->stmt;
    1362                 :    29776188 :   tree half_type;
    1363                 :             : 
    1364                 :             :   /* Look for the following pattern
    1365                 :             :           DX = (TYPE1) X;
    1366                 :             :           DY = (TYPE1) Y;
    1367                 :             :           DDIFF = DX - DY;
    1368                 :             :           DAD = ABS_EXPR <DDIFF>;
    1369                 :             :           DDPROD = (TYPE2) DPROD;
    1370                 :             :           sum_1 = DAD + sum_0;
    1371                 :             :      In which
    1372                 :             :      - DX is at least double the size of X
    1373                 :             :      - DY is at least double the size of Y
    1374                 :             :      - DX, DY, DDIFF, DAD all have the same type
    1375                 :             :      - sum is the same size of DAD or bigger
    1376                 :             :      - sum has been recognized as a reduction variable.
    1377                 :             : 
    1378                 :             :      This is equivalent to:
    1379                 :             :        DDIFF = X w- Y;          #widen sub
    1380                 :             :        DAD = ABS_EXPR <DDIFF>;
    1381                 :             :        sum_1 = DAD w+ sum_0;    #widen summation
    1382                 :             :      or
    1383                 :             :        DDIFF = X w- Y;          #widen sub
    1384                 :             :        DAD = ABS_EXPR <DDIFF>;
    1385                 :             :        sum_1 = DAD + sum_0;     #summation
    1386                 :             :    */
    1387                 :             : 
    1388                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1389                 :             :      of the above pattern.  */
    1390                 :             : 
    1391                 :    29776188 :   tree plus_oprnd0, plus_oprnd1;
    1392                 :    29776188 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1393                 :             :                                        &plus_oprnd0, &plus_oprnd1))
    1394                 :             :     return NULL;
    1395                 :             : 
    1396                 :       33281 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1397                 :             : 
    1398                 :             :   /* Any non-truncating sequence of conversions is OK here, since
    1399                 :             :      with a successful match, the result of the ABS(U) is known to fit
    1400                 :             :      within the nonnegative range of the result type.  (It cannot be the
    1401                 :             :      negative of the minimum signed value due to the range of the widening
    1402                 :             :      MINUS_EXPR.)  */
    1403                 :       33281 :   vect_unpromoted_value unprom_abs;
    1404                 :       33281 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1405                 :             :                                                       &unprom_abs);
    1406                 :             : 
    1407                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1408                 :             :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1409                 :             :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1410                 :             :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1411                 :             : 
    1412                 :       33281 :   if (!plus_oprnd0)
    1413                 :             :     return NULL;
    1414                 :             : 
    1415                 :       25039 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1416                 :       25039 :   if (!abs_stmt_vinfo)
    1417                 :             :     return NULL;
    1418                 :             : 
    1419                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1420                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1421                 :       24324 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1422                 :       72972 :   vect_unpromoted_value unprom[2];
    1423                 :             : 
    1424                 :       24324 :   if (!abs_stmt)
    1425                 :             :     {
    1426                 :    29776234 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1427                 :         272 :       if (!abd_stmt
    1428                 :         272 :           || !gimple_call_internal_p (abd_stmt)
    1429                 :           0 :           || gimple_call_num_args (abd_stmt) != 2)
    1430                 :             :         return NULL;
    1431                 :             : 
    1432                 :           0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1433                 :           0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1434                 :             : 
    1435                 :           0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
    1436                 :           0 :           || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1437                 :             :         {
    1438                 :           0 :           unprom[0].op = abd_oprnd0;
    1439                 :           0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1440                 :           0 :           unprom[1].op = abd_oprnd1;
    1441                 :           0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1442                 :             :         }
    1443                 :             :       else
    1444                 :             :         return NULL;
    1445                 :             : 
    1446                 :           0 :       half_type = unprom[0].type;
    1447                 :             :     }
    1448                 :       24001 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1449                 :             :                                             unprom, NULL))
    1450                 :             :     return NULL;
    1451                 :             : 
    1452                 :         377 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1453                 :             : 
    1454                 :         377 :   tree half_vectype;
    1455                 :         377 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1456                 :             :                                         type_out, &half_vectype))
    1457                 :             :     return NULL;
    1458                 :             : 
    1459                 :             :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1460                 :         226 :   tree sad_oprnd[2];
    1461                 :         226 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1462                 :             :                        unprom, half_vectype);
    1463                 :             : 
    1464                 :         226 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1465                 :         226 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1466                 :             :                                               sad_oprnd[1], plus_oprnd1);
    1467                 :             : 
    1468                 :         226 :   return pattern_stmt;
    1469                 :             : }
    1470                 :             : 
    1471                 :             : /* Function vect_recog_abd_pattern
    1472                 :             : 
    1473                 :             :    Try to find the following ABsolute Difference (ABD) or
    1474                 :             :    widening ABD (WIDEN_ABD) pattern:
    1475                 :             : 
    1476                 :             :    TYPE1 x;
    1477                 :             :    TYPE2 y;
    1478                 :             :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1479                 :             :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1480                 :             :    TYPE3 diff = x_cast - y_cast;
    1481                 :             :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1482                 :             :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1483                 :             : 
    1484                 :             :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1485                 :             :    twice as wide as TYPE3.
    1486                 :             : 
    1487                 :             :    Input:
    1488                 :             : 
    1489                 :             :    * STMT_VINFO: The stmt from which the pattern search begins
    1490                 :             : 
    1491                 :             :    Output:
    1492                 :             : 
    1493                 :             :    * TYPE_OUT: The type of the output of this pattern
    1494                 :             : 
    1495                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1496                 :             :      stmts that constitute the pattern, principally:
    1497                 :             :         out = IFN_ABD (x, y)
    1498                 :             :         out = IFN_WIDEN_ABD (x, y)
    1499                 :             :  */
    1500                 :             : 
    1501                 :             : static gimple *
    1502                 :    29719175 : vect_recog_abd_pattern (vec_info *vinfo,
    1503                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1504                 :             : {
    1505                 :    50009293 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1506                 :    20290118 :   if (!last_stmt)
    1507                 :             :     return NULL;
    1508                 :             : 
    1509                 :    20290118 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1510                 :             : 
    1511                 :    60870354 :   vect_unpromoted_value unprom[2];
    1512                 :    20290118 :   gassign *diff_stmt = NULL;
    1513                 :    20290118 :   tree abd_in_type;
    1514                 :    20290118 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
    1515                 :             :                                        unprom, &diff_stmt))
    1516                 :             :     {
    1517                 :             :       /* We cannot try further without having a non-widening MINUS.  */
    1518                 :    20289219 :       if (!diff_stmt)
    1519                 :             :         return NULL;
    1520                 :             : 
    1521                 :        1291 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1522                 :        1291 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1523                 :        1291 :       abd_in_type = signed_type_for (out_type);
    1524                 :             :     }
    1525                 :             : 
    1526                 :        2190 :   tree abd_out_type = abd_in_type;
    1527                 :             : 
    1528                 :        2190 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1529                 :        2190 :   if (!vectype_in)
    1530                 :             :     return NULL;
    1531                 :             : 
    1532                 :        1908 :   internal_fn ifn = IFN_ABD;
    1533                 :        1908 :   tree vectype_out = vectype_in;
    1534                 :             : 
    1535                 :        1908 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1536                 :        1908 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1537                 :             :     {
    1538                 :         792 :       tree mid_type
    1539                 :         792 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1540                 :         792 :                                           TYPE_UNSIGNED (abd_in_type));
    1541                 :         792 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1542                 :             : 
    1543                 :         792 :       code_helper dummy_code;
    1544                 :         792 :       int dummy_int;
    1545                 :         792 :       auto_vec<tree> dummy_vec;
    1546                 :         792 :       if (mid_vectype
    1547                 :         792 :           && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
    1548                 :             :                                              stmt_vinfo, mid_vectype,
    1549                 :             :                                              vectype_in,
    1550                 :             :                                              &dummy_code, &dummy_code,
    1551                 :             :                                              &dummy_int, &dummy_vec))
    1552                 :             :         {
    1553                 :           0 :           ifn = IFN_VEC_WIDEN_ABD;
    1554                 :           0 :           abd_out_type = mid_type;
    1555                 :           0 :           vectype_out = mid_vectype;
    1556                 :             :         }
    1557                 :         792 :     }
    1558                 :             : 
    1559                 :         792 :   if (ifn == IFN_ABD
    1560                 :        1908 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1561                 :             :                                           OPTIMIZE_FOR_SPEED))
    1562                 :             :     return NULL;
    1563                 :             : 
    1564                 :           0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1565                 :             : 
    1566                 :           0 :   tree abd_oprnds[2];
    1567                 :           0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1568                 :             :                        abd_in_type, unprom, vectype_in);
    1569                 :             : 
    1570                 :           0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1571                 :             : 
    1572                 :           0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1573                 :           0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1574                 :             :                                                 abd_oprnds[0], abd_oprnds[1]);
    1575                 :           0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1576                 :           0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1577                 :             : 
    1578                 :           0 :   gimple *stmt = abd_stmt;
    1579                 :           0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1580                 :           0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1581                 :           0 :       && !TYPE_UNSIGNED (abd_out_type))
    1582                 :             :     {
    1583                 :           0 :       tree unsign = unsigned_type_for (abd_out_type);
    1584                 :           0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1585                 :           0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1586                 :             :     }
    1587                 :             : 
    1588                 :           0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1589                 :             : }
    1590                 :             : 
    1591                 :             : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1592                 :             :    so that it can be treated as though it had the form:
    1593                 :             : 
    1594                 :             :       A_TYPE a;
    1595                 :             :       B_TYPE b;
    1596                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1597                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1598                 :             :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1599                 :             :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1600                 :             :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1601                 :             : 
    1602                 :             :    Try to replace the pattern with:
    1603                 :             : 
    1604                 :             :       A_TYPE a;
    1605                 :             :       B_TYPE b;
    1606                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1607                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1608                 :             :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1609                 :             :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1610                 :             : 
    1611                 :             :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1612                 :             : 
    1613                 :             :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1614                 :             :    name of the pattern being matched, for dump purposes.  */
    1615                 :             : 
    1616                 :             : static gimple *
    1617                 :   119612938 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1618                 :             :                              stmt_vec_info last_stmt_info, tree *type_out,
    1619                 :             :                              tree_code orig_code, code_helper wide_code,
    1620                 :             :                              bool shift_p, const char *name)
    1621                 :             : {
    1622                 :   119612938 :   gimple *last_stmt = last_stmt_info->stmt;
    1623                 :             : 
    1624                 :   358838814 :   vect_unpromoted_value unprom[2];
    1625                 :   119612938 :   tree half_type;
    1626                 :   119612938 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1627                 :             :                              shift_p, 2, unprom, &half_type))
    1628                 :             : 
    1629                 :             :     return NULL;
    1630                 :             : 
    1631                 :             :   /* Pattern detected.  */
    1632                 :      294607 :   vect_pattern_detected (name, last_stmt);
    1633                 :             : 
    1634                 :      294607 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1635                 :      294607 :   tree itype = type;
    1636                 :      294607 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1637                 :      294607 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1638                 :      204181 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1639                 :      204181 :                                             TYPE_UNSIGNED (half_type));
    1640                 :             : 
    1641                 :             :   /* Check target support  */
    1642                 :      294607 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1643                 :      294607 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1644                 :      294607 :   tree ctype = itype;
    1645                 :      294607 :   tree vecctype = vecitype;
    1646                 :      294607 :   if (orig_code == MINUS_EXPR
    1647                 :        7920 :       && TYPE_UNSIGNED (itype)
    1648                 :      298253 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1649                 :             :     {
    1650                 :             :       /* Subtraction is special, even if half_type is unsigned and no matter
    1651                 :             :          whether type is signed or unsigned, if type is wider than itype,
    1652                 :             :          we need to sign-extend from the widening operation result to the
    1653                 :             :          result type.
    1654                 :             :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1655                 :             :          itype unsigned short and type either int or unsigned int.
    1656                 :             :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1657                 :             :          (unsigned short) 0xffff, but for type int we want the result -1
    1658                 :             :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1659                 :         590 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1660                 :         590 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1661                 :             :     }
    1662                 :             : 
    1663                 :      294607 :   code_helper dummy_code;
    1664                 :      294607 :   int dummy_int;
    1665                 :      294607 :   auto_vec<tree> dummy_vec;
    1666                 :      294607 :   if (!vectype
    1667                 :      294607 :       || !vecitype
    1668                 :      231879 :       || !vecctype
    1669                 :      526486 :       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
    1670                 :             :                                           vecitype, vectype,
    1671                 :             :                                           &dummy_code, &dummy_code,
    1672                 :             :                                           &dummy_int, &dummy_vec))
    1673                 :      199565 :     return NULL;
    1674                 :             : 
    1675                 :       95042 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1676                 :       95042 :   if (!*type_out)
    1677                 :             :     return NULL;
    1678                 :             : 
    1679                 :       95042 :   tree oprnd[2];
    1680                 :       95042 :   vect_convert_inputs (vinfo, last_stmt_info,
    1681                 :             :                        2, oprnd, half_type, unprom, vectype);
    1682                 :             : 
    1683                 :       95042 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1684                 :       95042 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1685                 :             : 
    1686                 :       95042 :   if (vecctype != vecitype)
    1687                 :           0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1688                 :             :                                         pattern_stmt, vecitype);
    1689                 :             : 
    1690                 :       95042 :   return vect_convert_output (vinfo, last_stmt_info,
    1691                 :       95042 :                               type, pattern_stmt, vecctype);
    1692                 :      294607 : }
    1693                 :             : 
    1694                 :             : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1695                 :             :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1696                 :             : 
    1697                 :             : static gimple *
    1698                 :    29799617 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1699                 :             :                                tree *type_out)
    1700                 :             : {
    1701                 :    29799617 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1702                 :    29799617 :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1703                 :    29799617 :                                       "vect_recog_widen_mult_pattern");
    1704                 :             : }
    1705                 :             : 
    1706                 :             : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1707                 :             :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1708                 :             : 
    1709                 :             : static gimple *
    1710                 :    30018576 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1711                 :             :                                tree *type_out)
    1712                 :             : {
    1713                 :    30018576 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1714                 :    30018576 :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1715                 :    30018576 :                                       false, "vect_recog_widen_plus_pattern");
    1716                 :             : }
    1717                 :             : 
    1718                 :             : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1719                 :             :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1720                 :             : static gimple *
    1721                 :    30018576 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1722                 :             :                                tree *type_out)
    1723                 :             : {
    1724                 :    30018576 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1725                 :    30018576 :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1726                 :    30018576 :                                       false, "vect_recog_widen_minus_pattern");
    1727                 :             : }
    1728                 :             : 
    1729                 :             : /* Try to detect abd on widened inputs, converting IFN_ABD
    1730                 :             :    to IFN_VEC_WIDEN_ABD.  */
    1731                 :             : static gimple *
    1732                 :    30018576 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1733                 :             :                               tree *type_out)
    1734                 :             : {
    1735                 :    30018576 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1736                 :    27879595 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1737                 :             :     return NULL;
    1738                 :             : 
    1739                 :     2816301 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1740                 :             : 
    1741                 :     2816301 :   tree in_type = TREE_TYPE (last_rhs);
    1742                 :     2816301 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1743                 :     2816301 :   if (!INTEGRAL_TYPE_P (in_type)
    1744                 :     2527075 :       || !INTEGRAL_TYPE_P (out_type)
    1745                 :     2432523 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1746                 :     3393758 :       || !TYPE_UNSIGNED (in_type))
    1747                 :             :     return NULL;
    1748                 :             : 
    1749                 :      204873 :   vect_unpromoted_value unprom;
    1750                 :      204873 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1751                 :      204873 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1752                 :             :     return NULL;
    1753                 :             : 
    1754                 :      204298 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1755                 :      204298 :   if (!abd_pattern_vinfo)
    1756                 :             :     return NULL;
    1757                 :             : 
    1758                 :    30027529 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1759                 :        8953 :   if (!abd_stmt
    1760                 :        8953 :       || !gimple_call_internal_p (abd_stmt)
    1761                 :         246 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1762                 :             :     return NULL;
    1763                 :             : 
    1764                 :           0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1765                 :           0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1766                 :             : 
    1767                 :           0 :   code_helper dummy_code;
    1768                 :           0 :   int dummy_int;
    1769                 :           0 :   auto_vec<tree> dummy_vec;
    1770                 :           0 :   if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
    1771                 :             :                                        vectype_out, vectype_in,
    1772                 :             :                                        &dummy_code, &dummy_code,
    1773                 :             :                                        &dummy_int, &dummy_vec))
    1774                 :             :     return NULL;
    1775                 :             : 
    1776                 :           0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1777                 :             : 
    1778                 :           0 :   *type_out = vectype_out;
    1779                 :             : 
    1780                 :           0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1781                 :           0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1782                 :           0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1783                 :           0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1784                 :             :                                                       abd_oprnd0, abd_oprnd1);
    1785                 :           0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1786                 :           0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1787                 :           0 :   return widen_abd_stmt;
    1788                 :           0 : }
    1789                 :             : 
    1790                 :             : /* Function vect_recog_ctz_ffs_pattern
    1791                 :             : 
    1792                 :             :    Try to find the following pattern:
    1793                 :             : 
    1794                 :             :    TYPE1 A;
    1795                 :             :    TYPE1 B;
    1796                 :             : 
    1797                 :             :    B = __builtin_ctz{,l,ll} (A);
    1798                 :             : 
    1799                 :             :    or
    1800                 :             : 
    1801                 :             :    B = __builtin_ffs{,l,ll} (A);
    1802                 :             : 
    1803                 :             :    Input:
    1804                 :             : 
    1805                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1806                 :             :    here it starts with B = __builtin_* (A);
    1807                 :             : 
    1808                 :             :    Output:
    1809                 :             : 
    1810                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1811                 :             : 
    1812                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1813                 :             :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1814                 :             : 
    1815                 :             : static gimple *
    1816                 :    29776073 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1817                 :             :                             tree *type_out)
    1818                 :             : {
    1819                 :    29776073 :   gimple *call_stmt = stmt_vinfo->stmt;
    1820                 :    29776073 :   gimple *pattern_stmt;
    1821                 :    29776073 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1822                 :    29776073 :   tree new_var;
    1823                 :    29776073 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1824                 :    29776073 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1825                 :    29776073 :   int val = 0, val_new = 0, val_cmp = 0;
    1826                 :    29776073 :   int prec;
    1827                 :    29776073 :   int sub = 0, add = 0;
    1828                 :    29776073 :   location_t loc;
    1829                 :             : 
    1830                 :    29776073 :   if (!is_gimple_call (call_stmt))
    1831                 :             :     return NULL;
    1832                 :             : 
    1833                 :     3470090 :   if (gimple_call_num_args (call_stmt) != 1
    1834                 :     3470090 :       && gimple_call_num_args (call_stmt) != 2)
    1835                 :             :     return NULL;
    1836                 :             : 
    1837                 :     1899192 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1838                 :     1899192 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1839                 :     1899192 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1840                 :     1899192 :   if (!lhs_oprnd)
    1841                 :             :     return NULL;
    1842                 :      943496 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1843                 :      943496 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1844                 :      326817 :       || !INTEGRAL_TYPE_P (rhs_type)
    1845                 :       51161 :       || !type_has_mode_precision_p (rhs_type)
    1846                 :      993083 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1847                 :      905019 :     return NULL;
    1848                 :             : 
    1849                 :       38477 :   switch (gimple_call_combined_fn (call_stmt))
    1850                 :             :     {
    1851                 :        1121 :     CASE_CFN_CTZ:
    1852                 :        1121 :       ifn = IFN_CTZ;
    1853                 :        1121 :       if (!gimple_call_internal_p (call_stmt)
    1854                 :        1121 :           || gimple_call_num_args (call_stmt) != 2)
    1855                 :             :         defined_at_zero = false;
    1856                 :             :       else
    1857                 :          48 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1858                 :             :       break;
    1859                 :             :     CASE_CFN_FFS:
    1860                 :             :       ifn = IFN_FFS;
    1861                 :             :       break;
    1862                 :             :     default:
    1863                 :             :       return NULL;
    1864                 :             :     }
    1865                 :             : 
    1866                 :        1281 :   prec = TYPE_PRECISION (rhs_type);
    1867                 :        1281 :   loc = gimple_location (call_stmt);
    1868                 :             : 
    1869                 :        1281 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1870                 :        1281 :   if (!vec_type)
    1871                 :             :     return NULL;
    1872                 :             : 
    1873                 :        1275 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1874                 :        1275 :   if (!vec_rhs_type)
    1875                 :             :     return NULL;
    1876                 :             : 
    1877                 :             :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1878                 :             :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1879                 :             :      popcount<vector_mode>2.  */
    1880                 :        1047 :   if (!vec_type
    1881                 :        1047 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1882                 :             :                                          OPTIMIZE_FOR_SPEED))
    1883                 :             :     return NULL;
    1884                 :             : 
    1885                 :        1047 :   if (ifn == IFN_FFS
    1886                 :        1047 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1887                 :             :                                          OPTIMIZE_FOR_SPEED))
    1888                 :             :     {
    1889                 :           0 :       ifnnew = IFN_CTZ;
    1890                 :           0 :       defined_at_zero_new
    1891                 :           0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1892                 :             :                                      val_new) == 2;
    1893                 :             :     }
    1894                 :        1047 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1895                 :             :                                            OPTIMIZE_FOR_SPEED))
    1896                 :             :     {
    1897                 :          88 :       ifnnew = IFN_CLZ;
    1898                 :          88 :       defined_at_zero_new
    1899                 :          88 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1900                 :             :                                      val_new) == 2;
    1901                 :             :     }
    1902                 :          88 :   if ((ifnnew == IFN_LAST
    1903                 :          88 :        || (defined_at_zero && !defined_at_zero_new))
    1904                 :         959 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1905                 :             :                                          OPTIMIZE_FOR_SPEED))
    1906                 :             :     {
    1907                 :             :       ifnnew = IFN_POPCOUNT;
    1908                 :             :       defined_at_zero_new = true;
    1909                 :             :       val_new = prec;
    1910                 :             :     }
    1911                 :        1011 :   if (ifnnew == IFN_LAST)
    1912                 :             :     return NULL;
    1913                 :             : 
    1914                 :         124 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1915                 :             : 
    1916                 :         124 :   val_cmp = val_new;
    1917                 :         124 :   if ((ifnnew == IFN_CLZ
    1918                 :         124 :        && defined_at_zero
    1919                 :          60 :        && defined_at_zero_new
    1920                 :          60 :        && val == prec
    1921                 :          31 :        && val_new == prec)
    1922                 :          93 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1923                 :             :     {
    1924                 :             :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1925                 :             :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1926                 :             :       if (ifnnew == IFN_CLZ)
    1927                 :             :         sub = prec;
    1928                 :          56 :       val_cmp = prec;
    1929                 :             : 
    1930                 :          56 :       if (!TYPE_UNSIGNED (rhs_type))
    1931                 :             :         {
    1932                 :          12 :           rhs_type = unsigned_type_for (rhs_type);
    1933                 :          12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1934                 :          12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1935                 :          12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1936                 :          12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1937                 :             :                                   vec_rhs_type);
    1938                 :          12 :           rhs_oprnd = new_var;
    1939                 :             :         }
    1940                 :             : 
    1941                 :          56 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1942                 :          56 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1943                 :             :                                           build_int_cst (rhs_type, -1));
    1944                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1945                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1946                 :             : 
    1947                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1948                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1949                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1950                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1951                 :          56 :       rhs_oprnd = new_var;
    1952                 :             : 
    1953                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1954                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1955                 :             :                                           m1, rhs_oprnd);
    1956                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1957                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1958                 :          56 :       rhs_oprnd = new_var;
    1959                 :          56 :     }
    1960                 :          68 :   else if (ifnnew == IFN_CLZ)
    1961                 :             :     {
    1962                 :             :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1963                 :             :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1964                 :          57 :       sub = prec - (ifn == IFN_CTZ);
    1965                 :          57 :       val_cmp = sub - val_new;
    1966                 :             : 
    1967                 :          57 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1968                 :          57 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1969                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1970                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1971                 :             : 
    1972                 :          57 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1973                 :          57 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1974                 :             :                                           rhs_oprnd, neg);
    1975                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1976                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1977                 :          57 :       rhs_oprnd = new_var;
    1978                 :             :     }
    1979                 :          11 :   else if (ifnnew == IFN_POPCOUNT)
    1980                 :             :     {
    1981                 :             :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1982                 :             :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1983                 :          11 :       sub = prec + (ifn == IFN_FFS);
    1984                 :          11 :       val_cmp = sub;
    1985                 :             : 
    1986                 :          11 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1987                 :          11 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1988                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1989                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1990                 :             : 
    1991                 :          11 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1992                 :          11 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    1993                 :             :                                           rhs_oprnd, neg);
    1994                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1995                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1996                 :          11 :       rhs_oprnd = new_var;
    1997                 :             :     }
    1998                 :           0 :   else if (ifnnew == IFN_CTZ)
    1999                 :             :     {
    2000                 :             :       /* .FFS (X) = .CTZ (X) + 1.  */
    2001                 :           0 :       add = 1;
    2002                 :           0 :       val_cmp++;
    2003                 :             :     }
    2004                 :             : 
    2005                 :             :   /* Create B = .IFNNEW (A).  */
    2006                 :         124 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2007                 :         124 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    2008                 :          88 :     pattern_stmt
    2009                 :          88 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    2010                 :             :                                     build_int_cst (integer_type_node,
    2011                 :          88 :                                                    val_new));
    2012                 :             :   else
    2013                 :          36 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    2014                 :         124 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2015                 :         124 :   gimple_set_location (pattern_stmt, loc);
    2016                 :         124 :   *type_out = vec_type;
    2017                 :             : 
    2018                 :         124 :   if (sub)
    2019                 :             :     {
    2020                 :          99 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2021                 :          99 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2022                 :          99 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2023                 :          99 :                                           build_int_cst (lhs_type, sub),
    2024                 :             :                                           new_var);
    2025                 :          99 :       gimple_set_location (pattern_stmt, loc);
    2026                 :          99 :       new_var = ret_var;
    2027                 :             :     }
    2028                 :          25 :   else if (add)
    2029                 :             :     {
    2030                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2031                 :           0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2032                 :           0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2033                 :           0 :                                           build_int_cst (lhs_type, add));
    2034                 :           0 :       gimple_set_location (pattern_stmt, loc);
    2035                 :           0 :       new_var = ret_var;
    2036                 :             :     }
    2037                 :             : 
    2038                 :         124 :   if (defined_at_zero
    2039                 :          88 :       && (!defined_at_zero_new || val != val_cmp))
    2040                 :             :     {
    2041                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2042                 :          11 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2043                 :          11 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2044                 :          11 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2045                 :          11 :       tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2046                 :          11 :       pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
    2047                 :             :                                           build_zero_cst (rhs_type));
    2048                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    2049                 :             :                               truth_type_for (vec_type), rhs_type);
    2050                 :          11 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2051                 :             :                                           new_var,
    2052                 :          11 :                                           build_int_cst (lhs_type, val));
    2053                 :             :     }
    2054                 :             : 
    2055                 :         124 :   if (dump_enabled_p ())
    2056                 :          36 :     dump_printf_loc (MSG_NOTE, vect_location,
    2057                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2058                 :             : 
    2059                 :             :   return pattern_stmt;
    2060                 :             : }
    2061                 :             : 
    2062                 :             : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2063                 :             : 
    2064                 :             :    Try to find the following pattern:
    2065                 :             : 
    2066                 :             :    UTYPE1 A;
    2067                 :             :    TYPE1 B;
    2068                 :             :    UTYPE2 temp_in;
    2069                 :             :    TYPE3 temp_out;
    2070                 :             :    temp_in = (UTYPE2)A;
    2071                 :             : 
    2072                 :             :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2073                 :             :    B = (TYPE1) temp_out;
    2074                 :             : 
    2075                 :             :    TYPE2 may or may not be equal to TYPE3.
    2076                 :             :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2077                 :             :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2078                 :             : 
    2079                 :             :    Input:
    2080                 :             : 
    2081                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2082                 :             :    here it starts with B = (TYPE1) temp_out;
    2083                 :             : 
    2084                 :             :    Output:
    2085                 :             : 
    2086                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2087                 :             : 
    2088                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2089                 :             :    stmts that constitute the pattern. In this case it will be:
    2090                 :             :    B = .POPCOUNT (A);
    2091                 :             : 
    2092                 :             :    Similarly for clz, ctz and ffs.
    2093                 :             : */
    2094                 :             : 
    2095                 :             : static gimple *
    2096                 :    29775953 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2097                 :             :                                          stmt_vec_info stmt_vinfo,
    2098                 :             :                                          tree *type_out)
    2099                 :             : {
    2100                 :    29775953 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2101                 :    20346686 :   gimple *call_stmt, *pattern_stmt;
    2102                 :    20346686 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2103                 :    50122480 :   internal_fn ifn = IFN_LAST;
    2104                 :    29775794 :   int addend = 0;
    2105                 :             : 
    2106                 :             :   /* Find B = (TYPE1) temp_out. */
    2107                 :    20346686 :   if (!last_stmt)
    2108                 :             :     return NULL;
    2109                 :    20346686 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2110                 :    20346686 :   if (!CONVERT_EXPR_CODE_P (code))
    2111                 :             :     return NULL;
    2112                 :             : 
    2113                 :     2780360 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2114                 :     2780360 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2115                 :     2780360 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2116                 :             :     return NULL;
    2117                 :             : 
    2118                 :     2625509 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2119                 :     2625509 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2120                 :     2625509 :       || !has_single_use (rhs_oprnd))
    2121                 :             :     return NULL;
    2122                 :     1369222 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2123                 :             : 
    2124                 :             :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2125                 :     1369222 :   if (!is_gimple_call (call_stmt))
    2126                 :             :     return NULL;
    2127                 :      100760 :   switch (gimple_call_combined_fn (call_stmt))
    2128                 :             :     {
    2129                 :             :       int val;
    2130                 :             :     CASE_CFN_POPCOUNT:
    2131                 :             :       ifn = IFN_POPCOUNT;
    2132                 :             :       break;
    2133                 :        3348 :     CASE_CFN_CLZ:
    2134                 :        3348 :       ifn = IFN_CLZ;
    2135                 :             :       /* Punt if call result is unsigned and defined value at zero
    2136                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2137                 :        3348 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2138                 :           0 :           && gimple_call_internal_p (call_stmt)
    2139                 :        3348 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2140                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2141                 :        3348 :           && val < 0)
    2142                 :             :         return NULL;
    2143                 :             :       break;
    2144                 :         561 :     CASE_CFN_CTZ:
    2145                 :         561 :       ifn = IFN_CTZ;
    2146                 :             :       /* Punt if call result is unsigned and defined value at zero
    2147                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2148                 :         561 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2149                 :           0 :           && gimple_call_internal_p (call_stmt)
    2150                 :         561 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2151                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2152                 :         561 :           && val < 0)
    2153                 :             :         return NULL;
    2154                 :             :       break;
    2155                 :          18 :     CASE_CFN_FFS:
    2156                 :          18 :       ifn = IFN_FFS;
    2157                 :          18 :       break;
    2158                 :             :     default:
    2159                 :             :       return NULL;
    2160                 :             :     }
    2161                 :             : 
    2162                 :        4165 :   if (gimple_call_num_args (call_stmt) != 1
    2163                 :        4165 :       && gimple_call_num_args (call_stmt) != 2)
    2164                 :             :     return NULL;
    2165                 :             : 
    2166                 :        4165 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2167                 :        4165 :   vect_unpromoted_value unprom_diff;
    2168                 :        4165 :   rhs_origin
    2169                 :        4165 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2170                 :             : 
    2171                 :        4165 :   if (!rhs_origin)
    2172                 :             :     return NULL;
    2173                 :             : 
    2174                 :             :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2175                 :        4165 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2176                 :             :     return NULL;
    2177                 :             : 
    2178                 :             :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2179                 :             :      different builtins/internal functions have different behaviors.  */
    2180                 :        1640 :   if (TYPE_PRECISION (unprom_diff.type)
    2181                 :        1640 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2182                 :         158 :     switch (ifn)
    2183                 :             :       {
    2184                 :          79 :       case IFN_POPCOUNT:
    2185                 :             :         /* For popcount require zero extension, which doesn't add any
    2186                 :             :            further bits to the count.  */
    2187                 :          79 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2188                 :             :           return NULL;
    2189                 :             :         break;
    2190                 :          61 :       case IFN_CLZ:
    2191                 :             :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2192                 :             :            if it is undefined at zero or if it matches also for the
    2193                 :             :            defined value there.  */
    2194                 :          61 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2195                 :             :           return NULL;
    2196                 :          61 :         if (!type_has_mode_precision_p (lhs_type)
    2197                 :          61 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2198                 :           0 :           return NULL;
    2199                 :          61 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2200                 :          61 :                   - TYPE_PRECISION (lhs_type));
    2201                 :          61 :         if (gimple_call_internal_p (call_stmt)
    2202                 :          61 :             && gimple_call_num_args (call_stmt) == 2)
    2203                 :             :           {
    2204                 :           0 :             int val1, val2;
    2205                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2206                 :           0 :             int d2
    2207                 :           0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2208                 :             :                                            val2);
    2209                 :           0 :             if (d2 != 2 || val1 != val2 + addend)
    2210                 :             :               return NULL;
    2211                 :             :           }
    2212                 :             :         break;
    2213                 :          13 :       case IFN_CTZ:
    2214                 :             :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2215                 :             :            if it is undefined at zero or if it matches also for the
    2216                 :             :            defined value there.  */
    2217                 :          13 :         if (gimple_call_internal_p (call_stmt)
    2218                 :          13 :             && gimple_call_num_args (call_stmt) == 2)
    2219                 :             :           {
    2220                 :           0 :             int val1, val2;
    2221                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2222                 :           0 :             int d2
    2223                 :           0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2224                 :             :                                            val2);
    2225                 :           0 :             if (d2 != 2 || val1 != val2)
    2226                 :             :               return NULL;
    2227                 :             :           }
    2228                 :             :         break;
    2229                 :             :       case IFN_FFS:
    2230                 :             :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2231                 :             :         break;
    2232                 :           0 :       default:
    2233                 :           0 :         gcc_unreachable ();
    2234                 :             :       }
    2235                 :             : 
    2236                 :        1640 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2237                 :             :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2238                 :        1640 :   if (!vec_type)
    2239                 :             :     return NULL;
    2240                 :             : 
    2241                 :        1517 :   bool supported
    2242                 :        1517 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2243                 :        1517 :   if (!supported)
    2244                 :        1414 :     switch (ifn)
    2245                 :             :       {
    2246                 :             :       case IFN_POPCOUNT:
    2247                 :             :       case IFN_CLZ:
    2248                 :             :         return NULL;
    2249                 :          18 :       case IFN_FFS:
    2250                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2251                 :          18 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2252                 :             :                                             OPTIMIZE_FOR_SPEED))
    2253                 :             :           break;
    2254                 :             :         /* FALLTHRU */
    2255                 :         359 :       case IFN_CTZ:
    2256                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2257                 :             :            clz or popcount.  */
    2258                 :         359 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2259                 :             :                                             OPTIMIZE_FOR_SPEED))
    2260                 :             :           break;
    2261                 :         325 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2262                 :             :                                             OPTIMIZE_FOR_SPEED))
    2263                 :             :           break;
    2264                 :             :         return NULL;
    2265                 :           0 :       default:
    2266                 :           0 :         gcc_unreachable ();
    2267                 :             :       }
    2268                 :             : 
    2269                 :         159 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2270                 :             :                          call_stmt);
    2271                 :             : 
    2272                 :             :   /* Create B = .POPCOUNT (A).  */
    2273                 :         159 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2274                 :         159 :   tree arg2 = NULL_TREE;
    2275                 :         159 :   int val;
    2276                 :         159 :   if (ifn == IFN_CLZ
    2277                 :         191 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2278                 :             :                                     val) == 2)
    2279                 :          30 :     arg2 = build_int_cst (integer_type_node, val);
    2280                 :         129 :   else if (ifn == IFN_CTZ
    2281                 :         167 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2282                 :             :                                          val) == 2)
    2283                 :          38 :     arg2 = build_int_cst (integer_type_node, val);
    2284                 :         159 :   if (arg2)
    2285                 :          68 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2286                 :             :   else
    2287                 :          91 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2288                 :         159 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2289                 :         159 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2290                 :         159 :   *type_out = vec_type;
    2291                 :             : 
    2292                 :         159 :   if (dump_enabled_p ())
    2293                 :          24 :     dump_printf_loc (MSG_NOTE, vect_location,
    2294                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2295                 :             : 
    2296                 :         159 :   if (addend)
    2297                 :             :     {
    2298                 :           6 :       gcc_assert (supported);
    2299                 :           6 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2300                 :           6 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2301                 :           6 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2302                 :           6 :                                           build_int_cst (lhs_type, addend));
    2303                 :             :     }
    2304                 :         153 :   else if (!supported)
    2305                 :             :     {
    2306                 :          56 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2307                 :          56 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2308                 :          56 :       pattern_stmt
    2309                 :          56 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2310                 :          56 :       if (pattern_stmt == NULL)
    2311                 :             :         return NULL;
    2312                 :          56 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2313                 :             :         {
    2314                 :          56 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2315                 :          56 :           gimple_seq_add_seq_without_update (pseq, seq);
    2316                 :             :         }
    2317                 :             :     }
    2318                 :             :   return pattern_stmt;
    2319                 :             : }
    2320                 :             : 
    2321                 :             : /* Function vect_recog_pow_pattern
    2322                 :             : 
    2323                 :             :    Try to find the following pattern:
    2324                 :             : 
    2325                 :             :      x = POW (y, N);
    2326                 :             : 
    2327                 :             :    with POW being one of pow, powf, powi, powif and N being
    2328                 :             :    either 2 or 0.5.
    2329                 :             : 
    2330                 :             :    Input:
    2331                 :             : 
    2332                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2333                 :             : 
    2334                 :             :    Output:
    2335                 :             : 
    2336                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2337                 :             : 
    2338                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2339                 :             :    stmts that constitute the pattern. In this case it will be:
    2340                 :             :         x = x * x
    2341                 :             :    or
    2342                 :             :         x = sqrt (x)
    2343                 :             : */
    2344                 :             : 
    2345                 :             : static gimple *
    2346                 :    29775962 : vect_recog_pow_pattern (vec_info *vinfo,
    2347                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2348                 :             : {
    2349                 :    29775962 :   gimple *last_stmt = stmt_vinfo->stmt;
    2350                 :    29775962 :   tree base, exp;
    2351                 :    29775962 :   gimple *stmt;
    2352                 :    29775962 :   tree var;
    2353                 :             : 
    2354                 :    29775962 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2355                 :             :     return NULL;
    2356                 :             : 
    2357                 :     1480971 :   switch (gimple_call_combined_fn (last_stmt))
    2358                 :             :     {
    2359                 :         248 :     CASE_CFN_POW:
    2360                 :         248 :     CASE_CFN_POWI:
    2361                 :         248 :       break;
    2362                 :             : 
    2363                 :             :     default:
    2364                 :             :       return NULL;
    2365                 :             :     }
    2366                 :             : 
    2367                 :         248 :   base = gimple_call_arg (last_stmt, 0);
    2368                 :         248 :   exp = gimple_call_arg (last_stmt, 1);
    2369                 :         248 :   if (TREE_CODE (exp) != REAL_CST
    2370                 :         226 :       && TREE_CODE (exp) != INTEGER_CST)
    2371                 :             :     {
    2372                 :         226 :       if (flag_unsafe_math_optimizations
    2373                 :          26 :           && TREE_CODE (base) == REAL_CST
    2374                 :         228 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2375                 :             :         {
    2376                 :           2 :           combined_fn log_cfn;
    2377                 :           2 :           built_in_function exp_bfn;
    2378                 :           2 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2379                 :             :             {
    2380                 :             :             case BUILT_IN_POW:
    2381                 :             :               log_cfn = CFN_BUILT_IN_LOG;
    2382                 :             :               exp_bfn = BUILT_IN_EXP;
    2383                 :             :               break;
    2384                 :           0 :             case BUILT_IN_POWF:
    2385                 :           0 :               log_cfn = CFN_BUILT_IN_LOGF;
    2386                 :           0 :               exp_bfn = BUILT_IN_EXPF;
    2387                 :           0 :               break;
    2388                 :           0 :             case BUILT_IN_POWL:
    2389                 :           0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2390                 :           0 :               exp_bfn = BUILT_IN_EXPL;
    2391                 :           0 :               break;
    2392                 :             :             default:
    2393                 :             :               return NULL;
    2394                 :             :             }
    2395                 :           2 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2396                 :           2 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2397                 :             :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2398                 :             :              does that, but if C is a power of 2, we want to use
    2399                 :             :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2400                 :             :              vectorization we don't have vectorized exp2.  */
    2401                 :           2 :           if (logc
    2402                 :           2 :               && TREE_CODE (logc) == REAL_CST
    2403                 :           2 :               && exp_decl
    2404                 :           4 :               && lookup_attribute ("omp declare simd",
    2405                 :           2 :                                    DECL_ATTRIBUTES (exp_decl)))
    2406                 :             :             {
    2407                 :           2 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2408                 :           2 :               if (node->simd_clones == NULL)
    2409                 :             :                 {
    2410                 :           2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2411                 :           2 :                       || node->definition)
    2412                 :             :                     return NULL;
    2413                 :           2 :                   expand_simd_clones (node);
    2414                 :           2 :                   if (node->simd_clones == NULL)
    2415                 :             :                     return NULL;
    2416                 :             :                 }
    2417                 :           2 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2418                 :           2 :               if (!*type_out)
    2419                 :             :                 return NULL;
    2420                 :           2 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2421                 :           2 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2422                 :           2 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2423                 :           2 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2424                 :           2 :               g = gimple_build_call (exp_decl, 1, def);
    2425                 :           2 :               gimple_call_set_lhs (g, res);
    2426                 :           2 :               return g;
    2427                 :             :             }
    2428                 :             :         }
    2429                 :             : 
    2430                 :         224 :       return NULL;
    2431                 :             :     }
    2432                 :             : 
    2433                 :             :   /* We now have a pow or powi builtin function call with a constant
    2434                 :             :      exponent.  */
    2435                 :             : 
    2436                 :             :   /* Catch squaring.  */
    2437                 :          22 :   if ((tree_fits_shwi_p (exp)
    2438                 :           0 :        && tree_to_shwi (exp) == 2)
    2439                 :          22 :       || (TREE_CODE (exp) == REAL_CST
    2440                 :          22 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2441                 :             :     {
    2442                 :           0 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2443                 :           0 :                                             TREE_TYPE (base), type_out))
    2444                 :             :         return NULL;
    2445                 :             : 
    2446                 :           0 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2447                 :           0 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2448                 :           0 :       return stmt;
    2449                 :             :     }
    2450                 :             : 
    2451                 :             :   /* Catch square root.  */
    2452                 :          22 :   if (TREE_CODE (exp) == REAL_CST
    2453                 :          22 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2454                 :             :     {
    2455                 :          11 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2456                 :          11 :       if (*type_out
    2457                 :          11 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2458                 :             :                                              OPTIMIZE_FOR_SPEED))
    2459                 :             :         {
    2460                 :           9 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2461                 :           9 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2462                 :           9 :           gimple_call_set_lhs (stmt, var);
    2463                 :           9 :           gimple_call_set_nothrow (stmt, true);
    2464                 :           9 :           return stmt;
    2465                 :             :         }
    2466                 :             :     }
    2467                 :             : 
    2468                 :             :   return NULL;
    2469                 :             : }
    2470                 :             : 
    2471                 :             : 
    2472                 :             : /* Function vect_recog_widen_sum_pattern
    2473                 :             : 
    2474                 :             :    Try to find the following pattern:
    2475                 :             : 
    2476                 :             :      type x_t;
    2477                 :             :      TYPE x_T, sum = init;
    2478                 :             :    loop:
    2479                 :             :      sum_0 = phi <init, sum_1>
    2480                 :             :      S1  x_t = *p;
    2481                 :             :      S2  x_T = (TYPE) x_t;
    2482                 :             :      S3  sum_1 = x_T + sum_0;
    2483                 :             : 
    2484                 :             :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2485                 :             :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2486                 :             :    a special case of a reduction computation.
    2487                 :             : 
    2488                 :             :    Input:
    2489                 :             : 
    2490                 :             :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2491                 :             :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2492                 :             : 
    2493                 :             :    Output:
    2494                 :             : 
    2495                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2496                 :             : 
    2497                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2498                 :             :    stmts that constitute the pattern. In this case it will be:
    2499                 :             :         WIDEN_SUM <x_t, sum_0>
    2500                 :             : 
    2501                 :             :    Note: The widening-sum idiom is a widening reduction pattern that is
    2502                 :             :          vectorized without preserving all the intermediate results. It
    2503                 :             :          produces only N/2 (widened) results (by summing up pairs of
    2504                 :             :          intermediate results) rather than all N results.  Therefore, we
    2505                 :             :          cannot allow this pattern when we want to get all the results and in
    2506                 :             :          the correct order (as is the case when this computation is in an
    2507                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    2508                 :             : 
    2509                 :             : static gimple *
    2510                 :    29775962 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2511                 :             :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2512                 :             : {
    2513                 :    29775962 :   gimple *last_stmt = stmt_vinfo->stmt;
    2514                 :    29775962 :   tree oprnd0, oprnd1;
    2515                 :    29775962 :   tree type;
    2516                 :    29775962 :   gimple *pattern_stmt;
    2517                 :    29775962 :   tree var;
    2518                 :             : 
    2519                 :             :   /* Look for the following pattern
    2520                 :             :           DX = (TYPE) X;
    2521                 :             :           sum_1 = DX + sum_0;
    2522                 :             :      In which DX is at least double the size of X, and sum_1 has been
    2523                 :             :      recognized as a reduction variable.
    2524                 :             :    */
    2525                 :             : 
    2526                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2527                 :             :      of the above pattern.  */
    2528                 :             : 
    2529                 :    29775962 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2530                 :             :                                        &oprnd0, &oprnd1)
    2531                 :       33055 :       || TREE_CODE (oprnd0) != SSA_NAME
    2532                 :    29808835 :       || !vinfo->lookup_def (oprnd0))
    2533                 :    29743161 :     return NULL;
    2534                 :             : 
    2535                 :       32801 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2536                 :             : 
    2537                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2538                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2539                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2540                 :             :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2541                 :             :      'TYPE'.  */
    2542                 :             : 
    2543                 :       32801 :   vect_unpromoted_value unprom0;
    2544                 :       32801 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2545                 :       32801 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2546                 :             :     return NULL;
    2547                 :             : 
    2548                 :        1760 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2549                 :             : 
    2550                 :        1760 :   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2551                 :             :                                         unprom0.type, type_out))
    2552                 :             :     return NULL;
    2553                 :             : 
    2554                 :           0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2555                 :           0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2556                 :             : 
    2557                 :           0 :   return pattern_stmt;
    2558                 :             : }
    2559                 :             : 
    2560                 :             : /* Function vect_recog_bitfield_ref_pattern
    2561                 :             : 
    2562                 :             :    Try to find the following pattern:
    2563                 :             : 
    2564                 :             :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2565                 :             :    result = (type_out) bf_value;
    2566                 :             : 
    2567                 :             :    or
    2568                 :             : 
    2569                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2570                 :             : 
    2571                 :             :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2572                 :             :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2573                 :             : 
    2574                 :             :    Input:
    2575                 :             : 
    2576                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2577                 :             :    here it starts with:
    2578                 :             :    result = (type_out) bf_value;
    2579                 :             : 
    2580                 :             :    or
    2581                 :             : 
    2582                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2583                 :             : 
    2584                 :             :    Output:
    2585                 :             : 
    2586                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2587                 :             : 
    2588                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2589                 :             :    stmts that constitute the pattern. If the precision of type_out is bigger
    2590                 :             :    than the precision type of _1 we perform the widening before the shifting,
    2591                 :             :    since the new precision will be large enough to shift the value and moving
    2592                 :             :    widening operations up the statement chain enables the generation of
    2593                 :             :    widening loads.  If we are widening and the operation after the pattern is
    2594                 :             :    an addition then we mask first and shift later, to enable the generation of
    2595                 :             :    shifting adds.  In the case of narrowing we will always mask first, shift
    2596                 :             :    last and then perform a narrowing operation.  This will enable the
    2597                 :             :    generation of narrowing shifts.
    2598                 :             : 
    2599                 :             :    Widening with mask first, shift later:
    2600                 :             :    container = (type_out) container;
    2601                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2602                 :             :    result = masked >> bitpos;
    2603                 :             : 
    2604                 :             :    Widening with shift first, mask last:
    2605                 :             :    container = (type_out) container;
    2606                 :             :    shifted = container >> bitpos;
    2607                 :             :    result = shifted & ((1 << bitsize) - 1);
    2608                 :             : 
    2609                 :             :    Narrowing:
    2610                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2611                 :             :    result = masked >> bitpos;
    2612                 :             :    result = (type_out) result;
    2613                 :             : 
    2614                 :             :    If the bitfield is signed and it's wider than type_out, we need to
    2615                 :             :    keep the result sign-extended:
    2616                 :             :    container = (type) container;
    2617                 :             :    masked = container << (prec - bitsize - bitpos);
    2618                 :             :    result = (type_out) (masked >> (prec - bitsize));
    2619                 :             : 
    2620                 :             :    Here type is the signed variant of the wider of type_out and the type
    2621                 :             :    of container.
    2622                 :             : 
    2623                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2624                 :             : 
    2625                 :             :    When the original bitfield was inside a gcond then an new gcond is also
    2626                 :             :    generated with the newly `result` as the operand to the comparison.
    2627                 :             : 
    2628                 :             : */
    2629                 :             : 
    2630                 :             : static gimple *
    2631                 :    29715804 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2632                 :             :                                  tree *type_out)
    2633                 :             : {
    2634                 :    29715804 :   gimple *bf_stmt = NULL;
    2635                 :    29715804 :   tree lhs = NULL_TREE;
    2636                 :    29715804 :   tree ret_type = NULL_TREE;
    2637                 :    29715804 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2638                 :    29715804 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2639                 :             :     {
    2640                 :     4954678 :       tree op = gimple_cond_lhs (cond_stmt);
    2641                 :     4954678 :       if (TREE_CODE (op) != SSA_NAME)
    2642                 :             :         return NULL;
    2643                 :     4954448 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2644                 :     4954448 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2645                 :             :         return NULL;
    2646                 :             :     }
    2647                 :    24761126 :   else if (is_gimple_assign (stmt)
    2648                 :    20286152 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2649                 :    27461654 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2650                 :             :     {
    2651                 :     2656293 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2652                 :     2656293 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2653                 :     2656293 :       lhs = gimple_assign_lhs (stmt);
    2654                 :     2656293 :       ret_type = TREE_TYPE (lhs);
    2655                 :             :     }
    2656                 :             : 
    2657                 :     5930095 :   if (!bf_stmt
    2658                 :     5930095 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2659                 :             :     return NULL;
    2660                 :             : 
    2661                 :       14118 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2662                 :       14118 :   tree container = TREE_OPERAND (bf_ref, 0);
    2663                 :       14118 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2664                 :             : 
    2665                 :       14118 :   if (!bit_field_offset (bf_ref).is_constant ()
    2666                 :       14118 :       || !bit_field_size (bf_ref).is_constant ()
    2667                 :       14118 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2668                 :             :     return NULL;
    2669                 :             : 
    2670                 :       27858 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2671                 :       14113 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2672                 :       16116 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2673                 :       12120 :     return NULL;
    2674                 :             : 
    2675                 :        1998 :   gimple *use_stmt, *pattern_stmt;
    2676                 :        1998 :   use_operand_p use_p;
    2677                 :        1998 :   bool shift_first = true;
    2678                 :        1998 :   tree container_type = TREE_TYPE (container);
    2679                 :        1998 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2680                 :             : 
    2681                 :             :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2682                 :             :      the container may change and we have to consider offset change for
    2683                 :             :      widening loads on big endianness.  The shift_n calculated here can be
    2684                 :             :      independent of widening.  */
    2685                 :        1998 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2686                 :        1998 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2687                 :        1998 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2688                 :        1998 :   if (BYTES_BIG_ENDIAN)
    2689                 :             :     shift_n = prec - shift_n - mask_width;
    2690                 :             : 
    2691                 :        1998 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2692                 :        1353 :                    TYPE_PRECISION (ret_type) > mask_width);
    2693                 :        1998 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2694                 :        1998 :                      TYPE_PRECISION (ret_type));
    2695                 :             : 
    2696                 :             :   /* We move the conversion earlier if the loaded type is smaller than the
    2697                 :             :      return type to enable the use of widening loads.  And if we need a
    2698                 :             :      sign extension, we need to convert the loaded value early to a signed
    2699                 :             :      type as well.  */
    2700                 :        1998 :   if (ref_sext || load_widen)
    2701                 :             :     {
    2702                 :         936 :       tree type = load_widen ? ret_type : container_type;
    2703                 :         936 :       if (ref_sext)
    2704                 :         891 :         type = gimple_signed_type (type);
    2705                 :         936 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2706                 :             :                                           NOP_EXPR, container);
    2707                 :         936 :       container = gimple_get_lhs (pattern_stmt);
    2708                 :         936 :       container_type = TREE_TYPE (container);
    2709                 :         936 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2710                 :         936 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2711                 :         936 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2712                 :             :     }
    2713                 :        1062 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2714                 :             :     /* If we are doing the conversion last then also delay the shift as we may
    2715                 :             :        be able to combine the shift and conversion in certain cases.  */
    2716                 :             :     shift_first = false;
    2717                 :             : 
    2718                 :             :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2719                 :             :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2720                 :             :      add into a single instruction.  */
    2721                 :        1295 :   if (lhs && !is_pattern_stmt_p (stmt_info)
    2722                 :        3293 :       && single_imm_use (lhs, &use_p, &use_stmt))
    2723                 :             :     {
    2724                 :         940 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2725                 :         940 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2726                 :             :         shift_first = false;
    2727                 :             :     }
    2728                 :             : 
    2729                 :             :   /* If we don't have to shift we only generate the mask, so just fix the
    2730                 :             :      code-path to shift_first.  */
    2731                 :        1998 :   if (shift_n == 0)
    2732                 :         707 :     shift_first = true;
    2733                 :             : 
    2734                 :        1998 :   tree result;
    2735                 :        1998 :   if (shift_first && !ref_sext)
    2736                 :             :     {
    2737                 :         446 :       tree shifted = container;
    2738                 :         446 :       if (shift_n)
    2739                 :             :         {
    2740                 :          53 :           pattern_stmt
    2741                 :          53 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2742                 :             :                                    RSHIFT_EXPR, container,
    2743                 :          53 :                                    build_int_cst (sizetype, shift_n));
    2744                 :          53 :           shifted = gimple_assign_lhs (pattern_stmt);
    2745                 :          53 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2746                 :             :         }
    2747                 :             : 
    2748                 :         446 :       tree mask = wide_int_to_tree (container_type,
    2749                 :         446 :                                     wi::mask (mask_width, false, prec));
    2750                 :             : 
    2751                 :         446 :       pattern_stmt
    2752                 :         446 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2753                 :             :                                BIT_AND_EXPR, shifted, mask);
    2754                 :         446 :       result = gimple_assign_lhs (pattern_stmt);
    2755                 :             :     }
    2756                 :             :   else
    2757                 :             :     {
    2758                 :        1552 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2759                 :        1552 :       if (!ref_sext)
    2760                 :             :         {
    2761                 :         661 :           tree mask = wide_int_to_tree (container_type,
    2762                 :         661 :                                         wi::shifted_mask (shift_n,
    2763                 :             :                                                           mask_width,
    2764                 :             :                                                           false, prec));
    2765                 :         661 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2766                 :             :                                               container, mask);
    2767                 :             :         }
    2768                 :             :       else
    2769                 :             :         {
    2770                 :         891 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2771                 :         891 :           shift_n += shl;
    2772                 :         891 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2773                 :             :                                               container,
    2774                 :             :                                               build_int_cst (sizetype,
    2775                 :         891 :                                                              shl));
    2776                 :             :         }
    2777                 :             : 
    2778                 :        1552 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2779                 :        1552 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2780                 :        1552 :       pattern_stmt
    2781                 :        1552 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2782                 :             :                                RSHIFT_EXPR, masked,
    2783                 :        1552 :                                build_int_cst (sizetype, shift_n));
    2784                 :        1552 :       result = gimple_assign_lhs (pattern_stmt);
    2785                 :             :     }
    2786                 :             : 
    2787                 :        1998 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2788                 :             :     {
    2789                 :        1299 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2790                 :        1299 :       pattern_stmt
    2791                 :        1299 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2792                 :             :                                NOP_EXPR, result);
    2793                 :             :     }
    2794                 :             : 
    2795                 :        1998 :   if (!lhs)
    2796                 :             :     {
    2797                 :         703 :       if (!vectype)
    2798                 :             :         return NULL;
    2799                 :             : 
    2800                 :         595 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2801                 :         595 :       vectype = truth_type_for (vectype);
    2802                 :             : 
    2803                 :             :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2804                 :             :                 same way as vect_recog_gcond_pattern does.  However because
    2805                 :             :                 patterns cannot match the same root twice,  when we handle and
    2806                 :             :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2807                 :             :                 apply anymore.  We should really fix it so that we don't need to
    2808                 :             :                 duplicate transformations like these.  */
    2809                 :         595 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2810                 :         595 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2811                 :         595 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2812                 :         595 :       gimple *new_stmt
    2813                 :         595 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2814                 :             :                                gimple_get_lhs (pattern_stmt),
    2815                 :             :                                fold_convert (container_type, cond_cst));
    2816                 :         595 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2817                 :         595 :       pattern_stmt
    2818                 :         595 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2819                 :         595 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2820                 :             :                              NULL_TREE, NULL_TREE);
    2821                 :             :     }
    2822                 :             : 
    2823                 :        1890 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2824                 :        1890 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2825                 :             : 
    2826                 :        1890 :   return pattern_stmt;
    2827                 :             : }
    2828                 :             : 
    2829                 :             : /* Function vect_recog_bit_insert_pattern
    2830                 :             : 
    2831                 :             :    Try to find the following pattern:
    2832                 :             : 
    2833                 :             :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2834                 :             : 
    2835                 :             :    Input:
    2836                 :             : 
    2837                 :             :    * STMT_VINFO: The stmt we want to replace.
    2838                 :             : 
    2839                 :             :    Output:
    2840                 :             : 
    2841                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2842                 :             : 
    2843                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2844                 :             :    stmts that constitute the pattern. In this case it will be:
    2845                 :             :    value = (container_type) value;          // Make sure
    2846                 :             :    shifted = value << bitpos;                 // Shift value into place
    2847                 :             :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2848                 :             :                                             // the 'to-write value'.
    2849                 :             :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2850                 :             :                                             // write to from the value we want
    2851                 :             :                                             // to write to.
    2852                 :             :    written = cleared | masked;              // Write bits.
    2853                 :             : 
    2854                 :             : 
    2855                 :             :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2856                 :             :    bits corresponding to the real size of the bitfield value we are writing to.
    2857                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2858                 :             : 
    2859                 :             : */
    2860                 :             : 
    2861                 :             : static gimple *
    2862                 :    29718735 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2863                 :             :                                tree *type_out)
    2864                 :             : {
    2865                 :    29718735 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2866                 :    27317632 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2867                 :             :     return NULL;
    2868                 :             : 
    2869                 :         610 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2870                 :         610 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2871                 :         610 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2872                 :             : 
    2873                 :         610 :   tree bf_type = TREE_TYPE (value);
    2874                 :         610 :   tree container_type = TREE_TYPE (container);
    2875                 :             : 
    2876                 :         610 :   if (!INTEGRAL_TYPE_P (container_type)
    2877                 :         610 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2878                 :             :     return NULL;
    2879                 :             : 
    2880                 :         506 :   gimple *pattern_stmt;
    2881                 :             : 
    2882                 :         506 :   vect_unpromoted_value unprom;
    2883                 :         506 :   unprom.set_op (value, vect_internal_def);
    2884                 :         506 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2885                 :             :                               get_vectype_for_scalar_type (vinfo,
    2886                 :             :                                                            container_type));
    2887                 :             : 
    2888                 :         506 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2889                 :         506 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2890                 :         506 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2891                 :         506 :   if (BYTES_BIG_ENDIAN)
    2892                 :             :     {
    2893                 :             :       shift_n = prec - shift_n - mask_width;
    2894                 :             :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2895                 :             :     }
    2896                 :             : 
    2897                 :         506 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2898                 :             :     {
    2899                 :           0 :       pattern_stmt =
    2900                 :           0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2901                 :             :                              NOP_EXPR, value);
    2902                 :           0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2903                 :           0 :       value = gimple_get_lhs (pattern_stmt);
    2904                 :             :     }
    2905                 :             : 
    2906                 :             :   /* Shift VALUE into place.  */
    2907                 :         506 :   tree shifted = value;
    2908                 :         506 :   if (shift_n)
    2909                 :             :     {
    2910                 :         261 :       gimple_seq stmts = NULL;
    2911                 :         261 :       shifted
    2912                 :         261 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2913                 :         261 :       if (!gimple_seq_empty_p (stmts))
    2914                 :         112 :         append_pattern_def_seq (vinfo, stmt_info,
    2915                 :             :                                 gimple_seq_first_stmt (stmts));
    2916                 :             :     }
    2917                 :             : 
    2918                 :         506 :   tree mask_t
    2919                 :         506 :     = wide_int_to_tree (container_type,
    2920                 :         506 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2921                 :             : 
    2922                 :             :   /* Clear bits we don't want to write back from SHIFTED.  */
    2923                 :         506 :   gimple_seq stmts = NULL;
    2924                 :         506 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2925                 :             :                               mask_t);
    2926                 :         506 :   if (!gimple_seq_empty_p (stmts))
    2927                 :             :     {
    2928                 :         110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2929                 :         110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2930                 :             :     }
    2931                 :             : 
    2932                 :             :   /* Mask off the bits in the container that we are to write to.  */
    2933                 :         506 :   mask_t = wide_int_to_tree (container_type,
    2934                 :         506 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2935                 :         506 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2936                 :         506 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2937                 :         506 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2938                 :             : 
    2939                 :             :   /* Write MASKED into CLEARED.  */
    2940                 :         506 :   pattern_stmt
    2941                 :         506 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2942                 :             :                            BIT_IOR_EXPR, cleared, masked);
    2943                 :             : 
    2944                 :         506 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2945                 :         506 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2946                 :             : 
    2947                 :         506 :   return pattern_stmt;
    2948                 :             : }
    2949                 :             : 
    2950                 :             : 
    2951                 :             : /* Recognize cases in which an operation is performed in one type WTYPE
    2952                 :             :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2953                 :             :    if we have:
    2954                 :             : 
    2955                 :             :      ATYPE a;  // narrower than NTYPE
    2956                 :             :      BTYPE b;  // narrower than NTYPE
    2957                 :             :      WTYPE aw = (WTYPE) a;
    2958                 :             :      WTYPE bw = (WTYPE) b;
    2959                 :             :      WTYPE res = aw + bw;  // only uses of aw and bw
    2960                 :             : 
    2961                 :             :    then it would be more efficient to do:
    2962                 :             : 
    2963                 :             :      NTYPE an = (NTYPE) a;
    2964                 :             :      NTYPE bn = (NTYPE) b;
    2965                 :             :      NTYPE resn = an + bn;
    2966                 :             :      WTYPE res = (WTYPE) resn;
    2967                 :             : 
    2968                 :             :    Other situations include things like:
    2969                 :             : 
    2970                 :             :      ATYPE a;  // NTYPE or narrower
    2971                 :             :      WTYPE aw = (WTYPE) a;
    2972                 :             :      WTYPE res = aw + b;
    2973                 :             : 
    2974                 :             :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2975                 :             :    to truncate "b" and do the operation on NTYPE instead:
    2976                 :             : 
    2977                 :             :      NTYPE an = (NTYPE) a;
    2978                 :             :      NTYPE bn = (NTYPE) b;  // truncation
    2979                 :             :      NTYPE resn = an + bn;
    2980                 :             :      WTYPE res = (WTYPE) resn;
    2981                 :             : 
    2982                 :             :    All users of "res" should then use "resn" instead, making the final
    2983                 :             :    statement dead (not marked as relevant).  The final statement is still
    2984                 :             :    needed to maintain the type correctness of the IR.
    2985                 :             : 
    2986                 :             :    vect_determine_precisions has already determined the minimum
    2987                 :             :    precison of the operation and the minimum precision required
    2988                 :             :    by users of the result.  */
    2989                 :             : 
    2990                 :             : static gimple *
    2991                 :    29719175 : vect_recog_over_widening_pattern (vec_info *vinfo,
    2992                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    2993                 :             : {
    2994                 :    29719175 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    2995                 :    20290118 :   if (!last_stmt)
    2996                 :             :     return NULL;
    2997                 :             : 
    2998                 :             :   /* See whether we have found that this operation can be done on a
    2999                 :             :      narrower type without changing its semantics.  */
    3000                 :    20290118 :   unsigned int new_precision = last_stmt_info->operation_precision;
    3001                 :    20290118 :   if (!new_precision)
    3002                 :             :     return NULL;
    3003                 :             : 
    3004                 :     1289664 :   tree lhs = gimple_assign_lhs (last_stmt);
    3005                 :     1289664 :   tree type = TREE_TYPE (lhs);
    3006                 :     1289664 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3007                 :             : 
    3008                 :             :   /* Punt for reductions where we don't handle the type conversions.  */
    3009                 :     1289664 :   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
    3010                 :             :     return NULL;
    3011                 :             : 
    3012                 :             :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    3013                 :             :      operands are interesting.  */
    3014                 :     1285512 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    3015                 :             : 
    3016                 :             :   /* Check the operands.  */
    3017                 :     1285512 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    3018                 :     1285512 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    3019                 :     1285512 :   unprom.quick_grow_cleared (nops);
    3020                 :     1285512 :   unsigned int min_precision = 0;
    3021                 :     1285512 :   bool single_use_p = false;
    3022                 :     3843613 :   for (unsigned int i = 0; i < nops; ++i)
    3023                 :             :     {
    3024                 :     2558762 :       tree op = gimple_op (last_stmt, first_op + i);
    3025                 :     2558762 :       if (TREE_CODE (op) == INTEGER_CST)
    3026                 :     1141436 :         unprom[i].set_op (op, vect_constant_def);
    3027                 :     1417326 :       else if (TREE_CODE (op) == SSA_NAME)
    3028                 :             :         {
    3029                 :     1417326 :           bool op_single_use_p = true;
    3030                 :     1417326 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3031                 :             :                                                      &op_single_use_p))
    3032                 :         661 :             return NULL;
    3033                 :             :           /* If:
    3034                 :             : 
    3035                 :             :              (1) N bits of the result are needed;
    3036                 :             :              (2) all inputs are widened from M<N bits; and
    3037                 :             :              (3) one operand OP is a single-use SSA name
    3038                 :             : 
    3039                 :             :              we can shift the M->N widening from OP to the output
    3040                 :             :              without changing the number or type of extensions involved.
    3041                 :             :              This then reduces the number of copies of STMT_INFO.
    3042                 :             : 
    3043                 :             :              If instead of (3) more than one operand is a single-use SSA name,
    3044                 :             :              shifting the extension to the output is even more of a win.
    3045                 :             : 
    3046                 :             :              If instead:
    3047                 :             : 
    3048                 :             :              (1) N bits of the result are needed;
    3049                 :             :              (2) one operand OP2 is widened from M2<N bits;
    3050                 :             :              (3) another operand OP1 is widened from M1<M2 bits; and
    3051                 :             :              (4) both OP1 and OP2 are single-use
    3052                 :             : 
    3053                 :             :              the choice is between:
    3054                 :             : 
    3055                 :             :              (a) truncating OP2 to M1, doing the operation on M1,
    3056                 :             :                  and then widening the result to N
    3057                 :             : 
    3058                 :             :              (b) widening OP1 to M2, doing the operation on M2, and then
    3059                 :             :                  widening the result to N
    3060                 :             : 
    3061                 :             :              Both shift the M2->N widening of the inputs to the output.
    3062                 :             :              (a) additionally shifts the M1->M2 widening to the output;
    3063                 :             :              it requires fewer copies of STMT_INFO but requires an extra
    3064                 :             :              M2->M1 truncation.
    3065                 :             : 
    3066                 :             :              Which is better will depend on the complexity and cost of
    3067                 :             :              STMT_INFO, which is hard to predict at this stage.  However,
    3068                 :             :              a clear tie-breaker in favor of (b) is the fact that the
    3069                 :             :              truncation in (a) increases the length of the operation chain.
    3070                 :             : 
    3071                 :             :              If instead of (4) only one of OP1 or OP2 is single-use,
    3072                 :             :              (b) is still a win over doing the operation in N bits:
    3073                 :             :              it still shifts the M2->N widening on the single-use operand
    3074                 :             :              to the output and reduces the number of STMT_INFO copies.
    3075                 :             : 
    3076                 :             :              If neither operand is single-use then operating on fewer than
    3077                 :             :              N bits might lead to more extensions overall.  Whether it does
    3078                 :             :              or not depends on global information about the vectorization
    3079                 :             :              region, and whether that's a good trade-off would again
    3080                 :             :              depend on the complexity and cost of the statements involved,
    3081                 :             :              as well as things like register pressure that are not normally
    3082                 :             :              modelled at this stage.  We therefore ignore these cases
    3083                 :             :              and just optimize the clear single-use wins above.
    3084                 :             : 
    3085                 :             :              Thus we take the maximum precision of the unpromoted operands
    3086                 :             :              and record whether any operand is single-use.  */
    3087                 :     1416665 :           if (unprom[i].dt == vect_internal_def)
    3088                 :             :             {
    3089                 :     1017963 :               min_precision = MAX (min_precision,
    3090                 :             :                                    TYPE_PRECISION (unprom[i].type));
    3091                 :     1017963 :               single_use_p |= op_single_use_p;
    3092                 :             :             }
    3093                 :             :         }
    3094                 :             :       else
    3095                 :             :         return NULL;
    3096                 :             :     }
    3097                 :             : 
    3098                 :             :   /* Although the operation could be done in operation_precision, we have
    3099                 :             :      to balance that against introducing extra truncations or extensions.
    3100                 :             :      Calculate the minimum precision that can be handled efficiently.
    3101                 :             : 
    3102                 :             :      The loop above determined that the operation could be handled
    3103                 :             :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3104                 :             :      extension from the inputs to the output without introducing more
    3105                 :             :      instructions, and would reduce the number of instructions required
    3106                 :             :      for STMT_INFO itself.
    3107                 :             : 
    3108                 :             :      vect_determine_precisions has also determined that the result only
    3109                 :             :      needs min_output_precision bits.  Truncating by a factor of N times
    3110                 :             :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3111                 :             :      than min_output_precision, doing the operation in TYPE and truncating
    3112                 :             :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3113                 :             :      In contrast:
    3114                 :             : 
    3115                 :             :      - truncating the input to a unary operation and doing the operation
    3116                 :             :        in the new type requires at most N - 1 + 1 = N instructions per
    3117                 :             :        output vector
    3118                 :             : 
    3119                 :             :      - doing the same for a binary operation requires at most
    3120                 :             :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3121                 :             : 
    3122                 :             :      Both unary and binary operations require fewer instructions than
    3123                 :             :      this if the operands were extended from a suitable truncated form.
    3124                 :             :      Thus there is usually nothing to lose by doing operations in
    3125                 :             :      min_output_precision bits, but there can be something to gain.  */
    3126                 :     1284851 :   if (!single_use_p)
    3127                 :      936088 :     min_precision = last_stmt_info->min_output_precision;
    3128                 :             :   else
    3129                 :      348763 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3130                 :             : 
    3131                 :             :   /* Apply the minimum efficient precision we just calculated.  */
    3132                 :     1284851 :   if (new_precision < min_precision)
    3133                 :             :     new_precision = min_precision;
    3134                 :     1284851 :   new_precision = vect_element_precision (new_precision);
    3135                 :     1284851 :   if (new_precision >= TYPE_PRECISION (type))
    3136                 :             :     return NULL;
    3137                 :             : 
    3138                 :      159801 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3139                 :             : 
    3140                 :      159801 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3141                 :      159801 :   if (!*type_out)
    3142                 :             :     return NULL;
    3143                 :             : 
    3144                 :             :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3145                 :      142700 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3146                 :      142700 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3147                 :             : 
    3148                 :             :   /* If we're truncating an operation, we need to make sure that we
    3149                 :             :      don't introduce new undefined overflow.  The codes tested here are
    3150                 :             :      a subset of those accepted by vect_truncatable_operation_p.  */
    3151                 :      142700 :   tree op_type = new_type;
    3152                 :      142700 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3153                 :      180035 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3154                 :       25731 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3155                 :             : 
    3156                 :             :   /* We specifically don't check here whether the target supports the
    3157                 :             :      new operation, since it might be something that a later pattern
    3158                 :             :      wants to rewrite anyway.  If targets have a minimum element size
    3159                 :             :      for some optabs, we should pattern-match smaller ops to larger ops
    3160                 :             :      where beneficial.  */
    3161                 :      142700 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3162                 :      142700 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3163                 :      142700 :   if (!new_vectype || !op_vectype)
    3164                 :             :     return NULL;
    3165                 :             : 
    3166                 :      142700 :   if (dump_enabled_p ())
    3167                 :        4586 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3168                 :             :                      type, new_type);
    3169                 :             : 
    3170                 :             :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3171                 :      142700 :   tree ops[3] = {};
    3172                 :      142910 :   for (unsigned int i = 1; i < first_op; ++i)
    3173                 :         210 :     ops[i - 1] = gimple_op (last_stmt, i);
    3174                 :      142700 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3175                 :      142700 :                        op_type, &unprom[0], op_vectype);
    3176                 :             : 
    3177                 :             :   /* Use the operation to produce a result of type OP_TYPE.  */
    3178                 :      142700 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3179                 :      142700 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3180                 :             :                                               ops[0], ops[1], ops[2]);
    3181                 :      142700 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3182                 :             : 
    3183                 :      142700 :   if (dump_enabled_p ())
    3184                 :        4586 :     dump_printf_loc (MSG_NOTE, vect_location,
    3185                 :             :                      "created pattern stmt: %G", pattern_stmt);
    3186                 :             : 
    3187                 :             :   /* Convert back to the original signedness, if OP_TYPE is different
    3188                 :             :      from NEW_TYPE.  */
    3189                 :      142700 :   if (op_type != new_type)
    3190                 :       25731 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3191                 :             :                                         pattern_stmt, op_vectype);
    3192                 :             : 
    3193                 :             :   /* Promote the result to the original type.  */
    3194                 :      142700 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3195                 :             :                                       pattern_stmt, new_vectype);
    3196                 :             : 
    3197                 :      142700 :   return pattern_stmt;
    3198                 :     1285512 : }
    3199                 :             : 
    3200                 :             : /* Recognize the following patterns:
    3201                 :             : 
    3202                 :             :      ATYPE a;  // narrower than TYPE
    3203                 :             :      BTYPE b;  // narrower than TYPE
    3204                 :             : 
    3205                 :             :    1) Multiply high with scaling
    3206                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3207                 :             :      Here, c is bitsize (TYPE) / 2 - 1.
    3208                 :             : 
    3209                 :             :    2) ... or also with rounding
    3210                 :             :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3211                 :             :      Here, d is bitsize (TYPE) / 2 - 2.
    3212                 :             : 
    3213                 :             :    3) Normal multiply high
    3214                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3215                 :             :      Here, e is bitsize (TYPE) / 2.
    3216                 :             : 
    3217                 :             :    where only the bottom half of res is used.  */
    3218                 :             : 
    3219                 :             : static gimple *
    3220                 :    29848394 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3221                 :             :                           stmt_vec_info last_stmt_info, tree *type_out)
    3222                 :             : {
    3223                 :             :   /* Check for a right shift.  */
    3224                 :    29848394 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3225                 :    20419203 :   if (!last_stmt
    3226                 :    20419203 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3227                 :             :     return NULL;
    3228                 :             : 
    3229                 :             :   /* Check that the shift result is wider than the users of the
    3230                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3231                 :      344435 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3232                 :      344435 :   unsigned int target_precision
    3233                 :      344435 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3234                 :      344435 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3235                 :      344435 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3236                 :             :     return NULL;
    3237                 :             : 
    3238                 :             :   /* Look through any change in sign on the outer shift input.  */
    3239                 :       46820 :   vect_unpromoted_value unprom_rshift_input;
    3240                 :       46820 :   tree rshift_input = vect_look_through_possible_promotion
    3241                 :       46820 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3242                 :       46820 :   if (!rshift_input
    3243                 :       46820 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3244                 :       46212 :            != TYPE_PRECISION (lhs_type))
    3245                 :             :     return NULL;
    3246                 :             : 
    3247                 :             :   /* Get the definition of the shift input.  */
    3248                 :       44249 :   stmt_vec_info rshift_input_stmt_info
    3249                 :       44249 :     = vect_get_internal_def (vinfo, rshift_input);
    3250                 :       44249 :   if (!rshift_input_stmt_info)
    3251                 :             :     return NULL;
    3252                 :       39835 :   gassign *rshift_input_stmt
    3253                 :    29882816 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3254                 :       34509 :   if (!rshift_input_stmt)
    3255                 :             :     return NULL;
    3256                 :             : 
    3257                 :       34509 :   stmt_vec_info mulh_stmt_info;
    3258                 :       34509 :   tree scale_term;
    3259                 :       34509 :   bool rounding_p = false;
    3260                 :             : 
    3261                 :             :   /* Check for the presence of the rounding term.  */
    3262                 :       41444 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3263                 :             :     {
    3264                 :             :       /* Check that the outer shift was by 1.  */
    3265                 :       17704 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3266                 :        8798 :         return NULL;
    3267                 :             : 
    3268                 :             :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3269                 :        1338 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3270                 :             :         return NULL;
    3271                 :             : 
    3272                 :             :       /* Look through any change in sign on the addition input.  */
    3273                 :          97 :       vect_unpromoted_value unprom_plus_input;
    3274                 :          97 :       tree plus_input = vect_look_through_possible_promotion
    3275                 :          97 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3276                 :          97 :       if (!plus_input
    3277                 :          97 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3278                 :          97 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3279                 :             :         return NULL;
    3280                 :             : 
    3281                 :             :       /* Get the definition of the multiply-high-scale part.  */
    3282                 :          97 :       stmt_vec_info plus_input_stmt_info
    3283                 :          97 :         = vect_get_internal_def (vinfo, plus_input);
    3284                 :          97 :       if (!plus_input_stmt_info)
    3285                 :             :         return NULL;
    3286                 :          97 :       gassign *plus_input_stmt
    3287                 :        8895 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3288                 :          97 :       if (!plus_input_stmt
    3289                 :          97 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3290                 :             :         return NULL;
    3291                 :             : 
    3292                 :             :       /* Look through any change in sign on the scaling input.  */
    3293                 :          54 :       vect_unpromoted_value unprom_scale_input;
    3294                 :          54 :       tree scale_input = vect_look_through_possible_promotion
    3295                 :          54 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3296                 :          54 :       if (!scale_input
    3297                 :          54 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3298                 :          54 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3299                 :             :         return NULL;
    3300                 :             : 
    3301                 :             :       /* Get the definition of the multiply-high part.  */
    3302                 :          54 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3303                 :          54 :       if (!mulh_stmt_info)
    3304                 :             :         return NULL;
    3305                 :             : 
    3306                 :             :       /* Get the scaling term.  */
    3307                 :          54 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3308                 :          54 :       rounding_p = true;
    3309                 :             :     }
    3310                 :             :   else
    3311                 :             :     {
    3312                 :       25657 :       mulh_stmt_info = rshift_input_stmt_info;
    3313                 :       25657 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3314                 :             :     }
    3315                 :             : 
    3316                 :             :   /* Check that the scaling factor is constant.  */
    3317                 :       25711 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3318                 :             :     return NULL;
    3319                 :             : 
    3320                 :             :   /* Check whether the scaling input term can be seen as two widened
    3321                 :             :      inputs multiplied together.  */
    3322                 :       74205 :   vect_unpromoted_value unprom_mult[2];
    3323                 :       24735 :   tree new_type;
    3324                 :       24735 :   unsigned int nops
    3325                 :       24735 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3326                 :             :                             false, 2, unprom_mult, &new_type);
    3327                 :       24735 :   if (nops != 2)
    3328                 :             :     return NULL;
    3329                 :             : 
    3330                 :             :   /* Adjust output precision.  */
    3331                 :        4466 :   if (TYPE_PRECISION (new_type) < target_precision)
    3332                 :           0 :     new_type = build_nonstandard_integer_type
    3333                 :           0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3334                 :             : 
    3335                 :        4466 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3336                 :        4466 :   internal_fn ifn;
    3337                 :             :   /* Check that the scaling factor is expected.  Instead of
    3338                 :             :      target_precision, we should use the one that we actually
    3339                 :             :      use for internal function.  */
    3340                 :        4466 :   if (rounding_p)
    3341                 :             :     {
    3342                 :             :       /* Check pattern 2).  */
    3343                 :         108 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3344                 :         162 :           != TYPE_PRECISION (lhs_type))
    3345                 :             :         return NULL;
    3346                 :             : 
    3347                 :             :       ifn = IFN_MULHRS;
    3348                 :             :     }
    3349                 :             :   else
    3350                 :             :     {
    3351                 :             :       /* Check for pattern 1).  */
    3352                 :        8824 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3353                 :       13236 :           == TYPE_PRECISION (lhs_type))
    3354                 :             :         ifn = IFN_MULHS;
    3355                 :             :       /* Check for pattern 3).  */
    3356                 :        4368 :       else if (wi::to_widest (scale_term) + mult_precision
    3357                 :        8736 :                == TYPE_PRECISION (lhs_type))
    3358                 :             :         ifn = IFN_MULH;
    3359                 :             :       else
    3360                 :             :         return NULL;
    3361                 :             :     }
    3362                 :             : 
    3363                 :        4394 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3364                 :             : 
    3365                 :             :   /* Check for target support.  */
    3366                 :        4394 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3367                 :        4394 :   if (!new_vectype
    3368                 :        8774 :       || !direct_internal_fn_supported_p
    3369                 :        4380 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3370                 :        4307 :     return NULL;
    3371                 :             : 
    3372                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3373                 :             :      it's likely to be discarded.  */
    3374                 :          87 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3375                 :          87 :   if (!*type_out)
    3376                 :             :     return NULL;
    3377                 :             : 
    3378                 :             :   /* Generate the IFN_MULHRS call.  */
    3379                 :          87 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3380                 :          87 :   tree new_ops[2];
    3381                 :          87 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3382                 :             :                        unprom_mult, new_vectype);
    3383                 :          87 :   gcall *mulhrs_stmt
    3384                 :          87 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3385                 :          87 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3386                 :          87 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3387                 :             : 
    3388                 :          87 :   if (dump_enabled_p ())
    3389                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3390                 :             :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3391                 :             : 
    3392                 :          87 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3393                 :          87 :                               mulhrs_stmt, new_vectype);
    3394                 :             : }
    3395                 :             : 
    3396                 :             : /* Recognize the patterns:
    3397                 :             : 
    3398                 :             :             ATYPE a;  // narrower than TYPE
    3399                 :             :             BTYPE b;  // narrower than TYPE
    3400                 :             :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3401                 :             :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3402                 :             : 
    3403                 :             :    where only the bottom half of avg is used.  Try to transform them into:
    3404                 :             : 
    3405                 :             :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3406                 :             :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3407                 :             : 
    3408                 :             :   followed by:
    3409                 :             : 
    3410                 :             :             TYPE avg = (TYPE) avg';
    3411                 :             : 
    3412                 :             :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3413                 :             :   of avg is used, all or part of the cast of avg' should become redundant.
    3414                 :             : 
    3415                 :             :   If there is no target support available, generate code to distribute rshift
    3416                 :             :   over plus and add a carry.  */
    3417                 :             : 
    3418                 :             : static gimple *
    3419                 :    29846657 : vect_recog_average_pattern (vec_info *vinfo,
    3420                 :             :                             stmt_vec_info last_stmt_info, tree *type_out)
    3421                 :             : {
    3422                 :             :   /* Check for a shift right by one bit.  */
    3423                 :    29846657 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3424                 :    20417600 :   if (!last_stmt
    3425                 :    20417600 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3426                 :      344303 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3427                 :    29793657 :     return NULL;
    3428                 :             : 
    3429                 :             :   /* Check that the shift result is wider than the users of the
    3430                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3431                 :       53000 :   tree lhs = gimple_assign_lhs (last_stmt);
    3432                 :       53000 :   tree type = TREE_TYPE (lhs);
    3433                 :       53000 :   unsigned int target_precision
    3434                 :       53000 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3435                 :       53000 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3436                 :             :     return NULL;
    3437                 :             : 
    3438                 :             :   /* Look through any change in sign on the shift input.  */
    3439                 :        2358 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3440                 :        2358 :   vect_unpromoted_value unprom_plus;
    3441                 :        2358 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3442                 :             :                                                      &unprom_plus);
    3443                 :        2358 :   if (!rshift_rhs
    3444                 :        2358 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3445                 :             :     return NULL;
    3446                 :             : 
    3447                 :             :   /* Get the definition of the shift input.  */
    3448                 :        2348 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3449                 :        2348 :   if (!plus_stmt_info)
    3450                 :             :     return NULL;
    3451                 :             : 
    3452                 :             :   /* Check whether the shift input can be seen as a tree of additions on
    3453                 :             :      2 or 3 widened inputs.
    3454                 :             : 
    3455                 :             :      Note that the pattern should be a win even if the result of one or
    3456                 :             :      more additions is reused elsewhere: if the pattern matches, we'd be
    3457                 :             :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3458                 :        9320 :   internal_fn ifn = IFN_AVG_FLOOR;
    3459                 :        9320 :   vect_unpromoted_value unprom[3];
    3460                 :        2330 :   tree new_type;
    3461                 :        2330 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3462                 :        2330 :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3463                 :             :                                             unprom, &new_type);
    3464                 :        2330 :   if (nops == 0)
    3465                 :             :     return NULL;
    3466                 :         924 :   if (nops == 3)
    3467                 :             :     {
    3468                 :             :       /* Check that one operand is 1.  */
    3469                 :             :       unsigned int i;
    3470                 :         957 :       for (i = 0; i < 3; ++i)
    3471                 :         897 :         if (integer_onep (unprom[i].op))
    3472                 :             :           break;
    3473                 :         299 :       if (i == 3)
    3474                 :             :         return NULL;
    3475                 :             :       /* Throw away the 1 operand and keep the other two.  */
    3476                 :         239 :       if (i < 2)
    3477                 :           0 :         unprom[i] = unprom[2];
    3478                 :             :       ifn = IFN_AVG_CEIL;
    3479                 :             :     }
    3480                 :             : 
    3481                 :         864 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3482                 :             : 
    3483                 :             :   /* We know that:
    3484                 :             : 
    3485                 :             :      (a) the operation can be viewed as:
    3486                 :             : 
    3487                 :             :            TYPE widened0 = (TYPE) UNPROM[0];
    3488                 :             :            TYPE widened1 = (TYPE) UNPROM[1];
    3489                 :             :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3490                 :             :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3491                 :             : 
    3492                 :             :      (b) the first two statements are equivalent to:
    3493                 :             : 
    3494                 :             :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3495                 :             :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3496                 :             : 
    3497                 :             :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3498                 :             :          where sensible;
    3499                 :             : 
    3500                 :             :      (d) all the operations can be performed correctly at twice the width of
    3501                 :             :          NEW_TYPE, due to the nature of the average operation; and
    3502                 :             : 
    3503                 :             :      (e) users of the result of the right shift need only TARGET_PRECISION
    3504                 :             :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3505                 :             :          precision.
    3506                 :             : 
    3507                 :             :      Under these circumstances, the only situation in which NEW_TYPE
    3508                 :             :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3509                 :             :      and an addition result are all used more than once.  Thus we can
    3510                 :             :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3511                 :             :      as "free", whereas widening the result of the average instruction
    3512                 :             :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3513                 :             :      therefore better not to go narrower than TARGET_PRECISION.  */
    3514                 :         864 :   if (TYPE_PRECISION (new_type) < target_precision)
    3515                 :           8 :     new_type = build_nonstandard_integer_type (target_precision,
    3516                 :           8 :                                                TYPE_UNSIGNED (new_type));
    3517                 :             : 
    3518                 :             :   /* Check for target support.  */
    3519                 :         864 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3520                 :         864 :   if (!new_vectype)
    3521                 :             :     return NULL;
    3522                 :             : 
    3523                 :         864 :   bool fallback_p = false;
    3524                 :             : 
    3525                 :         864 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3526                 :             :     ;
    3527                 :         730 :   else if (TYPE_UNSIGNED (new_type)
    3528                 :         267 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3529                 :         267 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3530                 :         267 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3531                 :         997 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3532                 :             :     fallback_p = true;
    3533                 :             :   else
    3534                 :         463 :     return NULL;
    3535                 :             : 
    3536                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3537                 :             :      it's likely to be discarded.  */
    3538                 :         401 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3539                 :         401 :   if (!*type_out)
    3540                 :             :     return NULL;
    3541                 :             : 
    3542                 :         400 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3543                 :         400 :   tree new_ops[2];
    3544                 :         400 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3545                 :             :                        unprom, new_vectype);
    3546                 :             : 
    3547                 :         400 :   if (fallback_p)
    3548                 :             :     {
    3549                 :             :       /* As a fallback, generate code for following sequence:
    3550                 :             : 
    3551                 :             :          shifted_op0 = new_ops[0] >> 1;
    3552                 :             :          shifted_op1 = new_ops[1] >> 1;
    3553                 :             :          sum_of_shifted = shifted_op0 + shifted_op1;
    3554                 :             :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3555                 :             :          carry = unmasked_carry & 1;
    3556                 :             :          new_var = sum_of_shifted + carry;
    3557                 :             :       */
    3558                 :             : 
    3559                 :         266 :       tree one_cst = build_one_cst (new_type);
    3560                 :         266 :       gassign *g;
    3561                 :             : 
    3562                 :         266 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3563                 :         266 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3564                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3565                 :             : 
    3566                 :         266 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3567                 :         266 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3568                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3569                 :             : 
    3570                 :         266 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3571                 :         266 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3572                 :             :                                shifted_op0, shifted_op1);
    3573                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3574                 :             : 
    3575                 :         266 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3576                 :         266 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3577                 :         266 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3578                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3579                 :             : 
    3580                 :         266 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3581                 :         266 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3582                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3583                 :             : 
    3584                 :         266 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3585                 :         266 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3586                 :             :     }
    3587                 :             : 
    3588                 :             :   /* Generate the IFN_AVG* call.  */
    3589                 :         134 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3590                 :             :                                                     new_ops[1]);
    3591                 :         134 :   gimple_call_set_lhs (average_stmt, new_var);
    3592                 :         134 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3593                 :             : 
    3594                 :         134 :   if (dump_enabled_p ())
    3595                 :          42 :     dump_printf_loc (MSG_NOTE, vect_location,
    3596                 :             :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3597                 :             : 
    3598                 :         134 :   return vect_convert_output (vinfo, last_stmt_info,
    3599                 :         134 :                               type, average_stmt, new_vectype);
    3600                 :             : }
    3601                 :             : 
    3602                 :             : /* Recognize cases in which the input to a cast is wider than its
    3603                 :             :    output, and the input is fed by a widening operation.  Fold this
    3604                 :             :    by removing the unnecessary intermediate widening.  E.g.:
    3605                 :             : 
    3606                 :             :      unsigned char a;
    3607                 :             :      unsigned int b = (unsigned int) a;
    3608                 :             :      unsigned short c = (unsigned short) b;
    3609                 :             : 
    3610                 :             :    -->
    3611                 :             : 
    3612                 :             :      unsigned short c = (unsigned short) a;
    3613                 :             : 
    3614                 :             :    Although this is rare in input IR, it is an expected side-effect
    3615                 :             :    of the over-widening pattern above.
    3616                 :             : 
    3617                 :             :    This is beneficial also for integer-to-float conversions, if the
    3618                 :             :    widened integer has more bits than the float, and if the unwidened
    3619                 :             :    input doesn't.  */
    3620                 :             : 
    3621                 :             : static gimple *
    3622                 :    29848394 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3623                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3624                 :             : {
    3625                 :             :   /* Check for a cast, including an integer-to-float conversion.  */
    3626                 :    50218733 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3627                 :    20419116 :   if (!last_stmt)
    3628                 :             :     return NULL;
    3629                 :    20419116 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3630                 :    20419116 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3631                 :             :     return NULL;
    3632                 :             : 
    3633                 :             :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3634                 :     2886066 :   tree lhs = gimple_assign_lhs (last_stmt);
    3635                 :     2886066 :   if (!lhs)
    3636                 :             :     return NULL;
    3637                 :     2886066 :   tree lhs_type = TREE_TYPE (lhs);
    3638                 :     2886066 :   scalar_mode lhs_mode;
    3639                 :     2867653 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3640                 :     5751989 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3641                 :       23695 :     return NULL;
    3642                 :             : 
    3643                 :             :   /* Check for a narrowing operation (from a vector point of view).  */
    3644                 :     2862371 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3645                 :     2862371 :   tree rhs_type = TREE_TYPE (rhs);
    3646                 :     2862371 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3647                 :     2575251 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3648                 :     7834097 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3649                 :             :     return NULL;
    3650                 :             : 
    3651                 :             :   /* Try to find an unpromoted input.  */
    3652                 :      372474 :   vect_unpromoted_value unprom;
    3653                 :      372474 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3654                 :      372474 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3655                 :             :     return NULL;
    3656                 :             : 
    3657                 :             :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3658                 :             :      same when extending from UNPROM as they are when extending from RHS.  */
    3659                 :       48907 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3660                 :       48907 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3661                 :             :     return NULL;
    3662                 :             : 
    3663                 :             :   /* We can get the same result by casting UNPROM directly, to avoid
    3664                 :             :      the unnecessary widening and narrowing.  */
    3665                 :       48777 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3666                 :             : 
    3667                 :       48777 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3668                 :       48777 :   if (!*type_out)
    3669                 :             :     return NULL;
    3670                 :             : 
    3671                 :       48777 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3672                 :       48777 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3673                 :       48777 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3674                 :             : 
    3675                 :       48777 :   return pattern_stmt;
    3676                 :             : }
    3677                 :             : 
    3678                 :             : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3679                 :             :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3680                 :             : 
    3681                 :             : static gimple *
    3682                 :    29776169 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3683                 :             :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3684                 :             : {
    3685                 :    29776169 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3686                 :    29776169 :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3687                 :    29776169 :                                       "vect_recog_widen_shift_pattern");
    3688                 :             : }
    3689                 :             : 
    3690                 :             : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3691                 :             : 
    3692                 :             :    type a_t, b_t, c_t;
    3693                 :             : 
    3694                 :             :    S0 a_t = b_t r<< c_t;
    3695                 :             : 
    3696                 :             :   Input/Output:
    3697                 :             : 
    3698                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    3699                 :             :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3700                 :             :     with a sequence:
    3701                 :             : 
    3702                 :             :    S1 d_t = -c_t;
    3703                 :             :    S2 e_t = d_t & (B - 1);
    3704                 :             :    S3 f_t = b_t << c_t;
    3705                 :             :    S4 g_t = b_t >> e_t;
    3706                 :             :    S0 a_t = f_t | g_t;
    3707                 :             : 
    3708                 :             :     where B is element bitsize of type.
    3709                 :             : 
    3710                 :             :   Output:
    3711                 :             : 
    3712                 :             :   * TYPE_OUT: The type of the output of this pattern.
    3713                 :             : 
    3714                 :             :   * Return value: A new stmt that will be used to replace the rotate
    3715                 :             :     S0 stmt.  */
    3716                 :             : 
    3717                 :             : static gimple *
    3718                 :    29776169 : vect_recog_rotate_pattern (vec_info *vinfo,
    3719                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3720                 :             : {
    3721                 :    29776169 :   gimple *last_stmt = stmt_vinfo->stmt;
    3722                 :    29776169 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3723                 :    29776169 :   gimple *pattern_stmt, *def_stmt;
    3724                 :    29776169 :   enum tree_code rhs_code;
    3725                 :    29776169 :   enum vect_def_type dt;
    3726                 :    29776169 :   optab optab1, optab2;
    3727                 :    29776169 :   edge ext_def = NULL;
    3728                 :    29776169 :   bool bswap16_p = false;
    3729                 :             : 
    3730                 :    29776169 :   if (is_gimple_assign (last_stmt))
    3731                 :             :     {
    3732                 :    20346865 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3733                 :    20346865 :       switch (rhs_code)
    3734                 :             :         {
    3735                 :        5627 :         case LROTATE_EXPR:
    3736                 :        5627 :         case RROTATE_EXPR:
    3737                 :        5627 :           break;
    3738                 :             :         default:
    3739                 :             :           return NULL;
    3740                 :             :         }
    3741                 :             : 
    3742                 :        5627 :       lhs = gimple_assign_lhs (last_stmt);
    3743                 :        5627 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3744                 :        5627 :       type = TREE_TYPE (oprnd0);
    3745                 :        5627 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3746                 :             :     }
    3747                 :     9429304 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3748                 :             :     {
    3749                 :             :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3750                 :             :          The vectorizer has bswap support, but only if the argument isn't
    3751                 :             :          promoted.  */
    3752                 :         150 :       lhs = gimple_call_lhs (last_stmt);
    3753                 :         150 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3754                 :         150 :       type = TREE_TYPE (oprnd0);
    3755                 :         150 :       if (!lhs
    3756                 :         150 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3757                 :         150 :           || TYPE_PRECISION (type) <= 16
    3758                 :           0 :           || TREE_CODE (oprnd0) != SSA_NAME
    3759                 :         150 :           || BITS_PER_UNIT != 8)
    3760                 :         150 :         return NULL;
    3761                 :             : 
    3762                 :           0 :       stmt_vec_info def_stmt_info;
    3763                 :           0 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3764                 :             :         return NULL;
    3765                 :             : 
    3766                 :           0 :       if (dt != vect_internal_def)
    3767                 :             :         return NULL;
    3768                 :             : 
    3769                 :           0 :       if (gimple_assign_cast_p (def_stmt))
    3770                 :             :         {
    3771                 :           0 :           def = gimple_assign_rhs1 (def_stmt);
    3772                 :           0 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3773                 :           0 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3774                 :             :             oprnd0 = def;
    3775                 :             :         }
    3776                 :             : 
    3777                 :           0 :       type = TREE_TYPE (lhs);
    3778                 :           0 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3779                 :           0 :       if (vectype == NULL_TREE)
    3780                 :             :         return NULL;
    3781                 :             : 
    3782                 :           0 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3783                 :             :         {
    3784                 :             :           /* The encoding uses one stepped pattern for each byte in the
    3785                 :             :              16-bit word.  */
    3786                 :           0 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3787                 :           0 :           for (unsigned i = 0; i < 3; ++i)
    3788                 :           0 :             for (unsigned j = 0; j < 2; ++j)
    3789                 :           0 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3790                 :             : 
    3791                 :           0 :           vec_perm_indices indices (elts, 1,
    3792                 :           0 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3793                 :           0 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3794                 :           0 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3795                 :             :             {
    3796                 :             :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3797                 :             :                  undo the argument promotion.  */
    3798                 :           0 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3799                 :             :                 {
    3800                 :           0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3801                 :           0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3802                 :           0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3803                 :           0 :                   oprnd0 = def;
    3804                 :             :                 }
    3805                 :             : 
    3806                 :             :               /* Pattern detected.  */
    3807                 :           0 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3808                 :             : 
    3809                 :           0 :               *type_out = vectype;
    3810                 :             : 
    3811                 :             :               /* Pattern supported.  Create a stmt to be used to replace the
    3812                 :             :                  pattern, with the unpromoted argument.  */
    3813                 :           0 :               var = vect_recog_temp_ssa_var (type, NULL);
    3814                 :           0 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3815                 :             :                                                 1, oprnd0);
    3816                 :           0 :               gimple_call_set_lhs (pattern_stmt, var);
    3817                 :           0 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3818                 :             :                                       gimple_call_fntype (last_stmt));
    3819                 :           0 :               return pattern_stmt;
    3820                 :             :             }
    3821                 :           0 :         }
    3822                 :             : 
    3823                 :           0 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3824                 :           0 :       rhs_code = LROTATE_EXPR;
    3825                 :           0 :       bswap16_p = true;
    3826                 :             :     }
    3827                 :             :   else
    3828                 :             :     return NULL;
    3829                 :             : 
    3830                 :        5627 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3831                 :        5507 :       || !INTEGRAL_TYPE_P (type)
    3832                 :       11061 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3833                 :             :     return NULL;
    3834                 :             : 
    3835                 :        5434 :   stmt_vec_info def_stmt_info;
    3836                 :        5434 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3837                 :             :     return NULL;
    3838                 :             : 
    3839                 :        5434 :   if (dt != vect_internal_def
    3840                 :        5228 :       && dt != vect_constant_def
    3841                 :          21 :       && dt != vect_external_def)
    3842                 :             :     return NULL;
    3843                 :             : 
    3844                 :        5428 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3845                 :        5428 :   if (vectype == NULL_TREE)
    3846                 :             :     return NULL;
    3847                 :             : 
    3848                 :             :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3849                 :             :      don't do anything here.  */
    3850                 :        5201 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3851                 :        5201 :   if (optab1
    3852                 :        5201 :       && can_implement_p (optab1, TYPE_MODE (vectype)))
    3853                 :             :     {
    3854                 :          36 :      use_rotate:
    3855                 :          36 :       if (bswap16_p)
    3856                 :             :         {
    3857                 :           0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3858                 :             :             {
    3859                 :           0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3860                 :           0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3861                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3862                 :           0 :               oprnd0 = def;
    3863                 :             :             }
    3864                 :             : 
    3865                 :             :           /* Pattern detected.  */
    3866                 :           0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3867                 :             : 
    3868                 :           0 :           *type_out = vectype;
    3869                 :             : 
    3870                 :             :           /* Pattern supported.  Create a stmt to be used to replace the
    3871                 :             :              pattern.  */
    3872                 :           0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3873                 :           0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3874                 :             :                                               oprnd1);
    3875                 :           0 :           return pattern_stmt;
    3876                 :             :         }
    3877                 :             :       return NULL;
    3878                 :             :     }
    3879                 :             : 
    3880                 :        5165 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3881                 :             :     {
    3882                 :        5117 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3883                 :        5117 :       if (optab2
    3884                 :        5117 :           && can_implement_p (optab2, TYPE_MODE (vectype)))
    3885                 :           0 :         goto use_rotate;
    3886                 :             :     }
    3887                 :             : 
    3888                 :        5165 :   tree utype = unsigned_type_for (type);
    3889                 :        5165 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3890                 :        5165 :   if (!uvectype)
    3891                 :             :     return NULL;
    3892                 :             : 
    3893                 :             :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3894                 :             :      don't do anything here either.  */
    3895                 :        5165 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3896                 :        5165 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3897                 :        5165 :   if (!optab1
    3898                 :        5165 :       || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3899                 :         519 :       || !optab2
    3900                 :        5684 :       || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3901                 :             :     {
    3902                 :        4646 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3903                 :             :         return NULL;
    3904                 :        4611 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3905                 :        4611 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3906                 :        4611 :       if (!optab1
    3907                 :        4611 :           || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3908                 :        3410 :           || !optab2
    3909                 :        8021 :           || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3910                 :        1201 :         return NULL;
    3911                 :             :     }
    3912                 :             : 
    3913                 :        3929 :   *type_out = vectype;
    3914                 :             : 
    3915                 :        3929 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3916                 :             :     {
    3917                 :          49 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3918                 :          49 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3919                 :          49 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3920                 :          49 :       oprnd0 = def;
    3921                 :             :     }
    3922                 :             : 
    3923                 :        3929 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3924                 :          13 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3925                 :             : 
    3926                 :        3929 :   def = NULL_TREE;
    3927                 :        3929 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3928                 :        3929 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3929                 :             :     def = oprnd1;
    3930                 :          28 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3931                 :             :     {
    3932                 :           0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3933                 :           0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3934                 :           0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3935                 :           0 :              == TYPE_PRECISION (type))
    3936                 :             :         def = rhs1;
    3937                 :             :     }
    3938                 :             : 
    3939                 :        3901 :   if (def == NULL_TREE)
    3940                 :             :     {
    3941                 :          28 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3942                 :          28 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3943                 :          28 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3944                 :             :     }
    3945                 :        3929 :   stype = TREE_TYPE (def);
    3946                 :             : 
    3947                 :        3929 :   if (TREE_CODE (def) == INTEGER_CST)
    3948                 :             :     {
    3949                 :        3817 :       if (!tree_fits_uhwi_p (def)
    3950                 :        3817 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3951                 :        7634 :           || integer_zerop (def))
    3952                 :           0 :         return NULL;
    3953                 :        3817 :       def2 = build_int_cst (stype,
    3954                 :        3817 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3955                 :             :     }
    3956                 :             :   else
    3957                 :             :     {
    3958                 :         112 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    3959                 :             : 
    3960                 :         112 :       if (vecstype == NULL_TREE)
    3961                 :             :         return NULL;
    3962                 :         112 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3963                 :         112 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    3964                 :         112 :       if (ext_def)
    3965                 :             :         {
    3966                 :          13 :           basic_block new_bb
    3967                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3968                 :          13 :           gcc_assert (!new_bb);
    3969                 :             :         }
    3970                 :             :       else
    3971                 :          99 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3972                 :             : 
    3973                 :         112 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3974                 :         112 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    3975                 :         112 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    3976                 :             :                                       gimple_assign_lhs (def_stmt), mask);
    3977                 :         112 :       if (ext_def)
    3978                 :             :         {
    3979                 :          13 :           basic_block new_bb
    3980                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3981                 :          13 :           gcc_assert (!new_bb);
    3982                 :             :         }
    3983                 :             :       else
    3984                 :          99 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3985                 :             :     }
    3986                 :             : 
    3987                 :        3929 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    3988                 :        7771 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    3989                 :             :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    3990                 :             :                                   oprnd0, def);
    3991                 :        3929 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3992                 :             : 
    3993                 :        3929 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    3994                 :        7771 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    3995                 :             :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    3996                 :             :                                   oprnd0, def2);
    3997                 :        3929 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3998                 :             : 
    3999                 :             :   /* Pattern detected.  */
    4000                 :        3929 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    4001                 :             : 
    4002                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4003                 :        3929 :   var = vect_recog_temp_ssa_var (utype, NULL);
    4004                 :        3929 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    4005                 :             : 
    4006                 :        3929 :   if (!useless_type_conversion_p (type, utype))
    4007                 :             :     {
    4008                 :          49 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    4009                 :          49 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    4010                 :          49 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    4011                 :             :     }
    4012                 :             :   return pattern_stmt;
    4013                 :             : }
    4014                 :             : 
    4015                 :             : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    4016                 :             :    vectorized:
    4017                 :             : 
    4018                 :             :    type a_t;
    4019                 :             :    TYPE b_T, res_T;
    4020                 :             : 
    4021                 :             :    S1 a_t = ;
    4022                 :             :    S2 b_T = ;
    4023                 :             :    S3 res_T = b_T op a_t;
    4024                 :             : 
    4025                 :             :   where type 'TYPE' is a type with different size than 'type',
    4026                 :             :   and op is <<, >> or rotate.
    4027                 :             : 
    4028                 :             :   Also detect cases:
    4029                 :             : 
    4030                 :             :    type a_t;
    4031                 :             :    TYPE b_T, c_T, res_T;
    4032                 :             : 
    4033                 :             :    S0 c_T = ;
    4034                 :             :    S1 a_t = (type) c_T;
    4035                 :             :    S2 b_T = ;
    4036                 :             :    S3 res_T = b_T op a_t;
    4037                 :             : 
    4038                 :             :   Input/Output:
    4039                 :             : 
    4040                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4041                 :             :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4042                 :             :     with a shift/rotate which has same type on both operands, in the
    4043                 :             :     second case just b_T op c_T, in the first case with added cast
    4044                 :             :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4045                 :             : 
    4046                 :             :   Output:
    4047                 :             : 
    4048                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4049                 :             : 
    4050                 :             :   * Return value: A new stmt that will be used to replace the shift/rotate
    4051                 :             :     S3 stmt.  */
    4052                 :             : 
    4053                 :             : static gimple *
    4054                 :    29780422 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4055                 :             :                                         stmt_vec_info stmt_vinfo,
    4056                 :             :                                         tree *type_out)
    4057                 :             : {
    4058                 :    29780422 :   gimple *last_stmt = stmt_vinfo->stmt;
    4059                 :    29780422 :   tree oprnd0, oprnd1, lhs, var;
    4060                 :    29780422 :   gimple *pattern_stmt;
    4061                 :    29780422 :   enum tree_code rhs_code;
    4062                 :             : 
    4063                 :    29780422 :   if (!is_gimple_assign (last_stmt))
    4064                 :             :     return NULL;
    4065                 :             : 
    4066                 :    20351118 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4067                 :    20351118 :   switch (rhs_code)
    4068                 :             :     {
    4069                 :      520544 :     case LSHIFT_EXPR:
    4070                 :      520544 :     case RSHIFT_EXPR:
    4071                 :      520544 :     case LROTATE_EXPR:
    4072                 :      520544 :     case RROTATE_EXPR:
    4073                 :      520544 :       break;
    4074                 :             :     default:
    4075                 :             :       return NULL;
    4076                 :             :     }
    4077                 :             : 
    4078                 :      520544 :   lhs = gimple_assign_lhs (last_stmt);
    4079                 :      520544 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4080                 :      520544 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4081                 :      520544 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4082                 :      469784 :       || TREE_CODE (oprnd1) != SSA_NAME
    4083                 :       47518 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4084                 :       18672 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4085                 :       18390 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4086                 :      538934 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4087                 :       18390 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4088                 :      502154 :     return NULL;
    4089                 :             : 
    4090                 :       18390 :   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
    4091                 :       18390 :   if (!def_vinfo)
    4092                 :             :     return NULL;
    4093                 :             : 
    4094                 :       16441 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4095                 :       16441 :   if (*type_out == NULL_TREE)
    4096                 :             :     return NULL;
    4097                 :             : 
    4098                 :       11084 :   tree def = NULL_TREE;
    4099                 :       11084 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4100                 :        9311 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4101                 :             :     {
    4102                 :        1948 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4103                 :        1948 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4104                 :        1948 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4105                 :         593 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4106                 :             :         {
    4107                 :         593 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4108                 :         593 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4109                 :             :             def = rhs1;
    4110                 :             :           else
    4111                 :             :             {
    4112                 :         588 :               tree mask
    4113                 :         588 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4114                 :         588 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4115                 :         588 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4116                 :         588 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4117                 :         588 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4118                 :         588 :                                                            TREE_TYPE (rhs1));
    4119                 :         588 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4120                 :             :             }
    4121                 :             :         }
    4122                 :             :     }
    4123                 :             : 
    4124                 :         593 :   if (def == NULL_TREE)
    4125                 :             :     {
    4126                 :       10491 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4127                 :       10491 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4128                 :       10491 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4129                 :             :     }
    4130                 :             : 
    4131                 :             :   /* Pattern detected.  */
    4132                 :       11084 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4133                 :             : 
    4134                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4135                 :       11084 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4136                 :       11084 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4137                 :             : 
    4138                 :       11084 :   return pattern_stmt;
    4139                 :             : }
    4140                 :             : 
    4141                 :             : /* Return true iff the target has a vector optab implementing the operation
    4142                 :             :    CODE on type VECTYPE.  */
    4143                 :             : 
    4144                 :             : static bool
    4145                 :      583574 : target_has_vecop_for_code (tree_code code, tree vectype)
    4146                 :             : {
    4147                 :      583574 :   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
    4148                 :      583574 :   return voptab
    4149                 :      583574 :          && can_implement_p (voptab, TYPE_MODE (vectype));
    4150                 :             : }
    4151                 :             : 
    4152                 :             : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4153                 :             :    needed by the multiplication-by-immediate synthesis algorithm described by
    4154                 :             :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4155                 :             :    present.  Return true iff the target supports all the steps.  */
    4156                 :             : 
    4157                 :             : static bool
    4158                 :      258399 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4159                 :             :                                  tree vectype, bool synth_shift_p)
    4160                 :             : {
    4161                 :      258399 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4162                 :             :     return false;
    4163                 :             : 
    4164                 :      258399 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4165                 :      258399 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4166                 :             : 
    4167                 :      258399 :   if (var == negate_variant
    4168                 :      258399 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4169                 :             :     return false;
    4170                 :             : 
    4171                 :             :   /* If we must synthesize shifts with additions make sure that vector
    4172                 :             :      addition is available.  */
    4173                 :      257922 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4174                 :             :     return false;
    4175                 :             : 
    4176                 :      141846 :   for (int i = 1; i < alg->ops; i++)
    4177                 :             :     {
    4178                 :      110142 :       switch (alg->op[i])
    4179                 :             :         {
    4180                 :             :         case alg_shift:
    4181                 :             :           break;
    4182                 :       28691 :         case alg_add_t_m2:
    4183                 :       28691 :         case alg_add_t2_m:
    4184                 :       28691 :         case alg_add_factor:
    4185                 :       28691 :           if (!supports_vplus)
    4186                 :             :             return false;
    4187                 :             :           break;
    4188                 :       17997 :         case alg_sub_t_m2:
    4189                 :       17997 :         case alg_sub_t2_m:
    4190                 :       17997 :         case alg_sub_factor:
    4191                 :       17997 :           if (!supports_vminus)
    4192                 :             :             return false;
    4193                 :             :           break;
    4194                 :             :         case alg_unknown:
    4195                 :             :         case alg_m:
    4196                 :             :         case alg_zero:
    4197                 :             :         case alg_impossible:
    4198                 :             :           return false;
    4199                 :           0 :         default:
    4200                 :           0 :           gcc_unreachable ();
    4201                 :             :         }
    4202                 :             :     }
    4203                 :             : 
    4204                 :             :   return true;
    4205                 :             : }
    4206                 :             : 
    4207                 :             : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4208                 :             :    putting the final result in DEST.  Append all statements but the last into
    4209                 :             :    VINFO.  Return the last statement.  */
    4210                 :             : 
    4211                 :             : static gimple *
    4212                 :           0 : synth_lshift_by_additions (vec_info *vinfo,
    4213                 :             :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4214                 :             :                            stmt_vec_info stmt_info)
    4215                 :             : {
    4216                 :           0 :   HOST_WIDE_INT i;
    4217                 :           0 :   tree itype = TREE_TYPE (op);
    4218                 :           0 :   tree prev_res = op;
    4219                 :           0 :   gcc_assert (amnt >= 0);
    4220                 :           0 :   for (i = 0; i < amnt; i++)
    4221                 :             :     {
    4222                 :           0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4223                 :             :                       : dest;
    4224                 :           0 :       gimple *stmt
    4225                 :           0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4226                 :           0 :       prev_res = tmp_var;
    4227                 :           0 :       if (i < amnt - 1)
    4228                 :           0 :         append_pattern_def_seq (vinfo, stmt_info, stmt);
    4229                 :             :       else
    4230                 :           0 :         return stmt;
    4231                 :             :     }
    4232                 :           0 :   gcc_unreachable ();
    4233                 :             :   return NULL;
    4234                 :             : }
    4235                 :             : 
    4236                 :             : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4237                 :             :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4238                 :             :    the process if necessary.  Append the resulting assignment statements
    4239                 :             :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4240                 :             :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4241                 :             :    left shifts using additions.  */
    4242                 :             : 
    4243                 :             : static tree
    4244                 :       46595 : apply_binop_and_append_stmt (vec_info *vinfo,
    4245                 :             :                              tree_code code, tree op1, tree op2,
    4246                 :             :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    4247                 :             : {
    4248                 :       46595 :   if (integer_zerop (op2)
    4249                 :       46595 :       && (code == LSHIFT_EXPR
    4250                 :       40840 :           || code == PLUS_EXPR))
    4251                 :             :     {
    4252                 :       40840 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4253                 :             :       return op1;
    4254                 :             :     }
    4255                 :             : 
    4256                 :        5755 :   gimple *stmt;
    4257                 :        5755 :   tree itype = TREE_TYPE (op1);
    4258                 :        5755 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4259                 :             : 
    4260                 :        5755 :   if (code == LSHIFT_EXPR
    4261                 :        5755 :       && synth_shift_p)
    4262                 :             :     {
    4263                 :           0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4264                 :           0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
    4265                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4266                 :           0 :       return tmp_var;
    4267                 :             :     }
    4268                 :             : 
    4269                 :        5755 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4270                 :        5755 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4271                 :        5755 :   return tmp_var;
    4272                 :             : }
    4273                 :             : 
    4274                 :             : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4275                 :             :    and simple arithmetic operations to be vectorized.  Record the statements
    4276                 :             :    produced in STMT_VINFO and return the last statement in the sequence or
    4277                 :             :    NULL if it's not possible to synthesize such a multiplication.
    4278                 :             :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4279                 :             :    works on tree-ssa form.  */
    4280                 :             : 
    4281                 :             : static gimple *
    4282                 :      261073 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4283                 :             :                              stmt_vec_info stmt_vinfo)
    4284                 :             : {
    4285                 :      261073 :   tree itype = TREE_TYPE (op);
    4286                 :      261073 :   machine_mode mode = TYPE_MODE (itype);
    4287                 :      261073 :   struct algorithm alg;
    4288                 :      261073 :   mult_variant variant;
    4289                 :      261073 :   if (!tree_fits_shwi_p (val))
    4290                 :             :     return NULL;
    4291                 :             : 
    4292                 :             :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4293                 :             :      signed overflow where the original operation didn't.  Perform the
    4294                 :             :      operations on an unsigned type and cast back to avoid this.
    4295                 :             :      In the future we may want to relax this for synthesis algorithms
    4296                 :             :      that we can prove do not cause unexpected overflow.  */
    4297                 :      258399 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4298                 :             : 
    4299                 :       54270 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4300                 :      258399 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4301                 :      258399 :   if (!vectype)
    4302                 :             :     return NULL;
    4303                 :             : 
    4304                 :             :   /* Targets that don't support vector shifts but support vector additions
    4305                 :             :      can synthesize shifts that way.  */
    4306                 :      258399 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4307                 :             : 
    4308                 :      258399 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4309                 :             :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4310                 :             :      The vectorizer's benefit analysis will decide whether it's beneficial
    4311                 :             :      to do this.  */
    4312                 :      516798 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4313                 :      258399 :                                        ? TYPE_MODE (vectype) : mode,
    4314                 :             :                                        hwval, &alg, &variant, MAX_COST);
    4315                 :      258399 :   if (!possible)
    4316                 :             :     return NULL;
    4317                 :             : 
    4318                 :      258399 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4319                 :             :     return NULL;
    4320                 :             : 
    4321                 :       31704 :   tree accumulator;
    4322                 :             : 
    4323                 :             :   /* Clear out the sequence of statements so we can populate it below.  */
    4324                 :       31704 :   gimple *stmt = NULL;
    4325                 :             : 
    4326                 :       31704 :   if (cast_to_unsigned_p)
    4327                 :             :     {
    4328                 :       11942 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4329                 :       11942 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4330                 :       11942 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4331                 :       11942 :       op = tmp_op;
    4332                 :             :     }
    4333                 :             : 
    4334                 :       31704 :   if (alg.op[0] == alg_zero)
    4335                 :         177 :     accumulator = build_int_cst (multtype, 0);
    4336                 :             :   else
    4337                 :             :     accumulator = op;
    4338                 :             : 
    4339                 :       31704 :   bool needs_fixup = (variant == negate_variant)
    4340                 :       31704 :                       || (variant == add_variant);
    4341                 :             : 
    4342                 :      141693 :   for (int i = 1; i < alg.ops; i++)
    4343                 :             :     {
    4344                 :      109989 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4345                 :      109989 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4346                 :      109989 :       tree tmp_var = NULL_TREE;
    4347                 :             : 
    4348                 :      109989 :       switch (alg.op[i])
    4349                 :             :         {
    4350                 :       63394 :         case alg_shift:
    4351                 :       63394 :           if (synth_shift_p)
    4352                 :           0 :             stmt
    4353                 :           0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4354                 :           0 :                                            alg.log[i], stmt_vinfo);
    4355                 :             :           else
    4356                 :       63394 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4357                 :             :                                          shft_log);
    4358                 :             :           break;
    4359                 :       23752 :         case alg_add_t_m2:
    4360                 :       23752 :           tmp_var
    4361                 :       23752 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4362                 :             :                                            stmt_vinfo, synth_shift_p);
    4363                 :       23752 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4364                 :             :                                        tmp_var);
    4365                 :       23752 :           break;
    4366                 :       17242 :         case alg_sub_t_m2:
    4367                 :       17242 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4368                 :             :                                                  shft_log, stmt_vinfo,
    4369                 :             :                                                  synth_shift_p);
    4370                 :             :           /* In some algorithms the first step involves zeroing the
    4371                 :             :              accumulator.  If subtracting from such an accumulator
    4372                 :             :              just emit the negation directly.  */
    4373                 :       17242 :           if (integer_zerop (accumulator))
    4374                 :         177 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4375                 :             :           else
    4376                 :       17065 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4377                 :             :                                         tmp_var);
    4378                 :             :           break;
    4379                 :           0 :         case alg_add_t2_m:
    4380                 :           0 :           tmp_var
    4381                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4382                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4383                 :           0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4384                 :           0 :           break;
    4385                 :           0 :         case alg_sub_t2_m:
    4386                 :           0 :           tmp_var
    4387                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4388                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4389                 :           0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4390                 :           0 :           break;
    4391                 :        4881 :         case alg_add_factor:
    4392                 :        4881 :           tmp_var
    4393                 :        4881 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4394                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4395                 :        4881 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4396                 :             :                                        tmp_var);
    4397                 :        4881 :           break;
    4398                 :         720 :         case alg_sub_factor:
    4399                 :         720 :           tmp_var
    4400                 :         720 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4401                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4402                 :         720 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4403                 :             :                                       accumulator);
    4404                 :         720 :           break;
    4405                 :           0 :         default:
    4406                 :           0 :           gcc_unreachable ();
    4407                 :             :         }
    4408                 :             :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4409                 :             :          but rather return it directly.  */
    4410                 :             : 
    4411                 :      109989 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4412                 :       90474 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4413                 :      109989 :       accumulator = accum_tmp;
    4414                 :             :     }
    4415                 :       31704 :   if (variant == negate_variant)
    4416                 :             :     {
    4417                 :         361 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4418                 :         361 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4419                 :         361 :       accumulator = accum_tmp;
    4420                 :         361 :       if (cast_to_unsigned_p)
    4421                 :         124 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4422                 :             :     }
    4423                 :       31343 :   else if (variant == add_variant)
    4424                 :             :     {
    4425                 :          68 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4426                 :          68 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4427                 :          68 :       accumulator = accum_tmp;
    4428                 :          68 :       if (cast_to_unsigned_p)
    4429                 :          58 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4430                 :             :     }
    4431                 :             :   /* Move back to a signed if needed.  */
    4432                 :       31457 :   if (cast_to_unsigned_p)
    4433                 :             :     {
    4434                 :       11942 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4435                 :       11942 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4436                 :             :     }
    4437                 :             : 
    4438                 :             :   return stmt;
    4439                 :             : }
    4440                 :             : 
    4441                 :             : /* Detect multiplication by constant and convert it into a sequence of
    4442                 :             :    shifts and additions, subtractions, negations.  We reuse the
    4443                 :             :    choose_mult_variant algorithms from expmed.cc
    4444                 :             : 
    4445                 :             :    Input/Output:
    4446                 :             : 
    4447                 :             :    STMT_VINFO: The stmt from which the pattern search begins,
    4448                 :             :    i.e. the mult stmt.
    4449                 :             : 
    4450                 :             :  Output:
    4451                 :             : 
    4452                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4453                 :             : 
    4454                 :             :   * Return value: A new stmt that will be used to replace
    4455                 :             :     the multiplication.  */
    4456                 :             : 
    4457                 :             : static gimple *
    4458                 :    29964791 : vect_recog_mult_pattern (vec_info *vinfo,
    4459                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4460                 :             : {
    4461                 :    29964791 :   gimple *last_stmt = stmt_vinfo->stmt;
    4462                 :    29964791 :   tree oprnd0, oprnd1, vectype, itype;
    4463                 :    29964791 :   gimple *pattern_stmt;
    4464                 :             : 
    4465                 :    29964791 :   if (!is_gimple_assign (last_stmt))
    4466                 :             :     return NULL;
    4467                 :             : 
    4468                 :    20535487 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4469                 :             :     return NULL;
    4470                 :             : 
    4471                 :     1317377 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4472                 :     1317377 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4473                 :     1317377 :   itype = TREE_TYPE (oprnd0);
    4474                 :             : 
    4475                 :     1317377 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4476                 :     1317314 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4477                 :      808354 :       || !INTEGRAL_TYPE_P (itype)
    4478                 :     2125731 :       || !type_has_mode_precision_p (itype))
    4479                 :      509075 :     return NULL;
    4480                 :             : 
    4481                 :      808302 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4482                 :      808302 :   if (vectype == NULL_TREE)
    4483                 :             :     return NULL;
    4484                 :             : 
    4485                 :             :   /* If the target can handle vectorized multiplication natively,
    4486                 :             :      don't attempt to optimize this.  */
    4487                 :      657127 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4488                 :      657127 :   if (mul_optab != unknown_optab
    4489                 :      657127 :       && can_implement_p (mul_optab, TYPE_MODE (vectype)))
    4490                 :             :     return NULL;
    4491                 :             : 
    4492                 :      261073 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4493                 :             :                                               oprnd0, oprnd1, stmt_vinfo);
    4494                 :      261073 :   if (!pattern_stmt)
    4495                 :             :     return NULL;
    4496                 :             : 
    4497                 :             :   /* Pattern detected.  */
    4498                 :       31704 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4499                 :             : 
    4500                 :       31704 :   *type_out = vectype;
    4501                 :             : 
    4502                 :       31704 :   return pattern_stmt;
    4503                 :             : }
    4504                 :             : 
    4505                 :             : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
    4506                 :             : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
    4507                 :             : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4508                 :             : 
    4509                 :             : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
    4510                 :             : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
    4511                 :             : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4512                 :             : 
    4513                 :             : static gimple *
    4514                 :         257 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
    4515                 :             :                                      internal_fn fn, tree *type_out,
    4516                 :             :                                      tree lhs, tree op_0, tree op_1)
    4517                 :             : {
    4518                 :         257 :   tree itype = TREE_TYPE (op_0);
    4519                 :         257 :   tree otype = TREE_TYPE (lhs);
    4520                 :         257 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4521                 :         257 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4522                 :             : 
    4523                 :         257 :   if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4524                 :         257 :     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    4525                 :             :     {
    4526                 :          61 :       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
    4527                 :          61 :       tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
    4528                 :             : 
    4529                 :          61 :       gimple_call_set_lhs (call, in_ssa);
    4530                 :          61 :       gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4531                 :          61 :       gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
    4532                 :             : 
    4533                 :          61 :       *type_out = v_otype;
    4534                 :             : 
    4535                 :          61 :       if (types_compatible_p (itype, otype))
    4536                 :             :         return call;
    4537                 :             :       else
    4538                 :             :         {
    4539                 :           0 :           append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
    4540                 :           0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4541                 :             : 
    4542                 :           0 :           return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
    4543                 :             :         }
    4544                 :             :     }
    4545                 :             : 
    4546                 :             :   return NULL;
    4547                 :             : }
    4548                 :             : 
    4549                 :             : /*
    4550                 :             :  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
    4551                 :             :  *   _7 = _4 + _6;
    4552                 :             :  *   _8 = _4 > _7;
    4553                 :             :  *   _9 = (long unsigned int) _8;
    4554                 :             :  *   _10 = -_9;
    4555                 :             :  *   _12 = _7 | _10;
    4556                 :             :  *
    4557                 :             :  * And then simplied to
    4558                 :             :  *   _12 = .SAT_ADD (_4, _6);
    4559                 :             :  */
    4560                 :             : 
    4561                 :             : static gimple *
    4562                 :    30045813 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4563                 :             :                             tree *type_out)
    4564                 :             : {
    4565                 :    30045813 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4566                 :             : 
    4567                 :    30045813 :   if (!is_gimple_assign (last_stmt))
    4568                 :             :     return NULL;
    4569                 :             : 
    4570                 :    20616509 :   tree ops[2];
    4571                 :    20616509 :   tree lhs = gimple_assign_lhs (last_stmt);
    4572                 :             : 
    4573                 :    20616509 :   if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
    4574                 :    20616509 :       || gimple_signed_integer_sat_add (lhs, ops, NULL))
    4575                 :             :     {
    4576                 :          50 :       if (TREE_CODE (ops[1]) == INTEGER_CST)
    4577                 :          12 :         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
    4578                 :             : 
    4579                 :          50 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4580                 :             :                                                           IFN_SAT_ADD, type_out,
    4581                 :             :                                                           lhs, ops[0], ops[1]);
    4582                 :          50 :       if (stmt)
    4583                 :             :         {
    4584                 :          32 :           vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
    4585                 :          32 :           return stmt;
    4586                 :             :         }
    4587                 :             :     }
    4588                 :             : 
    4589                 :             :   return NULL;
    4590                 :             : }
    4591                 :             : 
    4592                 :             : /*
    4593                 :             :  * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
    4594                 :             :  * the benchmark zip.  Aka:
    4595                 :             :  *
    4596                 :             :  *   unsigned int _1;
    4597                 :             :  *   unsigned int _2;
    4598                 :             :  *   unsigned short int _4;
    4599                 :             :  *   _9 = (unsigned short int).SAT_SUB (_1, _2);
    4600                 :             :  *
    4601                 :             :  *   if _1 is known to be in the range of unsigned short int.  For example
    4602                 :             :  *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
    4603                 :             :  *   truncation to:
    4604                 :             :  *
    4605                 :             :  *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
    4606                 :             :  *   _9 = .SAT_SUB (_4, _3);
    4607                 :             :  *
    4608                 :             :  *   Then,  we can better vectorized code and avoid the unnecessary narrowing
    4609                 :             :  *   stmt during vectorization with below stmt(s).
    4610                 :             :  *
    4611                 :             :  *   _3 = .SAT_TRUNC(_2); // SI => HI
    4612                 :             :  *   _9 = .SAT_SUB (_4, _3);
    4613                 :             :  */
    4614                 :             : static void
    4615                 :         207 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
    4616                 :             :                                       stmt_vec_info stmt_vinfo,
    4617                 :             :                                       tree lhs, tree *ops)
    4618                 :             : {
    4619                 :         207 :   tree otype = TREE_TYPE (lhs);
    4620                 :         207 :   tree itype = TREE_TYPE (ops[0]);
    4621                 :         207 :   unsigned itype_prec = TYPE_PRECISION (itype);
    4622                 :         207 :   unsigned otype_prec = TYPE_PRECISION (otype);
    4623                 :             : 
    4624                 :         207 :   if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
    4625                 :         207 :     return;
    4626                 :             : 
    4627                 :           0 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4628                 :           0 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4629                 :           0 :   tree_pair v_pair = tree_pair (v_otype, v_itype);
    4630                 :             : 
    4631                 :           0 :   if (v_otype == NULL_TREE || v_itype == NULL_TREE
    4632                 :           0 :     || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
    4633                 :             :                                         OPTIMIZE_FOR_BOTH))
    4634                 :           0 :     return;
    4635                 :             : 
    4636                 :             :   /* 1. Find the _4 and update ops[0] as above example.  */
    4637                 :           0 :   vect_unpromoted_value unprom;
    4638                 :           0 :   tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
    4639                 :             : 
    4640                 :           0 :   if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
    4641                 :             :     return;
    4642                 :             : 
    4643                 :           0 :   ops[0] = tmp;
    4644                 :             : 
    4645                 :             :   /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
    4646                 :           0 :   tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4647                 :           0 :   gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
    4648                 :             : 
    4649                 :           0 :   gimple_call_set_lhs (call, trunc_lhs_ssa);
    4650                 :           0 :   gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4651                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
    4652                 :             : 
    4653                 :           0 :   ops[1] = trunc_lhs_ssa;
    4654                 :             : }
    4655                 :             : 
    4656                 :             : /*
    4657                 :             :  * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
    4658                 :             :  * Unsigned:
    4659                 :             :  *   _7 = _1 >= _2;
    4660                 :             :  *   _8 = _1 - _2;
    4661                 :             :  *   _10 = (long unsigned int) _7;
    4662                 :             :  *   _9 = _8 * _10;
    4663                 :             :  *
    4664                 :             :  * And then simplied to
    4665                 :             :  *   _9 = .SAT_SUB (_1, _2);
    4666                 :             :  *
    4667                 :             :  * Signed:
    4668                 :             :  *   x.0_4 = (unsigned char) x_16;
    4669                 :             :  *   y.1_5 = (unsigned char) y_18;
    4670                 :             :  *   _6 = x.0_4 - y.1_5;
    4671                 :             :  *   minus_19 = (int8_t) _6;
    4672                 :             :  *   _7 = x_16 ^ y_18;
    4673                 :             :  *   _8 = x_16 ^ minus_19;
    4674                 :             :  *   _44 = _7 < 0;
    4675                 :             :  *   _23 = x_16 < 0;
    4676                 :             :  *   _24 = (signed char) _23;
    4677                 :             :  *   _58 = (unsigned char) _24;
    4678                 :             :  *   _59 = -_58;
    4679                 :             :  *   _25 = (signed char) _59;
    4680                 :             :  *   _26 = _25 ^ 127;
    4681                 :             :  *   _42 = _8 < 0;
    4682                 :             :  *   _41 = _42 & _44;
    4683                 :             :  *   iftmp.2_11 = _41 ? _26 : minus_19;
    4684                 :             :  *
    4685                 :             :  * And then simplied to
    4686                 :             :  *   iftmp.2_11 = .SAT_SUB (x_16, y_18);
    4687                 :             :  */
    4688                 :             : 
    4689                 :             : static gimple *
    4690                 :    30045781 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4691                 :             :                             tree *type_out)
    4692                 :             : {
    4693                 :    30045781 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4694                 :             : 
    4695                 :    30045781 :   if (!is_gimple_assign (last_stmt))
    4696                 :             :     return NULL;
    4697                 :             : 
    4698                 :    20616477 :   tree ops[2];
    4699                 :    20616477 :   tree lhs = gimple_assign_lhs (last_stmt);
    4700                 :             : 
    4701                 :    20616477 :   if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
    4702                 :    20616477 :       || gimple_signed_integer_sat_sub (lhs, ops, NULL))
    4703                 :             :     {
    4704                 :         207 :       vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
    4705                 :         207 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4706                 :             :                                                           IFN_SAT_SUB, type_out,
    4707                 :             :                                                           lhs, ops[0], ops[1]);
    4708                 :         207 :       if (stmt)
    4709                 :             :         {
    4710                 :          29 :           vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
    4711                 :          29 :           return stmt;
    4712                 :             :         }
    4713                 :             :     }
    4714                 :             : 
    4715                 :             :   return NULL;
    4716                 :             : }
    4717                 :             : 
    4718                 :             : /*
    4719                 :             :  * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
    4720                 :             :  *   overflow_5 = x_4(D) > 4294967295;
    4721                 :             :  *   _1 = (unsigned int) x_4(D);
    4722                 :             :  *   _2 = (unsigned int) overflow_5;
    4723                 :             :  *   _3 = -_2;
    4724                 :             :  *   _6 = _1 | _3;
    4725                 :             :  *
    4726                 :             :  * And then simplied to
    4727                 :             :  *   _6 = .SAT_TRUNC (x_4(D));
    4728                 :             :  */
    4729                 :             : 
    4730                 :             : static gimple *
    4731                 :    30045752 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4732                 :             :                               tree *type_out)
    4733                 :             : {
    4734                 :    30045752 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4735                 :             : 
    4736                 :    30045752 :   if (!is_gimple_assign (last_stmt))
    4737                 :             :     return NULL;
    4738                 :             : 
    4739                 :    20616448 :   tree ops[1];
    4740                 :    20616448 :   tree lhs = gimple_assign_lhs (last_stmt);
    4741                 :    20616448 :   tree otype = TREE_TYPE (lhs);
    4742                 :             : 
    4743                 :    20616448 :   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    4744                 :    20616177 :        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
    4745                 :    20616448 :       && type_has_mode_precision_p (otype))
    4746                 :             :     {
    4747                 :         259 :       tree itype = TREE_TYPE (ops[0]);
    4748                 :         259 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4749                 :         259 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4750                 :         259 :       internal_fn fn = IFN_SAT_TRUNC;
    4751                 :             : 
    4752                 :         253 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4753                 :         512 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4754                 :             :                                            OPTIMIZE_FOR_BOTH))
    4755                 :             :         {
    4756                 :           0 :           gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
    4757                 :           0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4758                 :             : 
    4759                 :           0 :           gimple_call_set_lhs (call, out_ssa);
    4760                 :           0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4761                 :           0 :           gimple_set_location (call, gimple_location (last_stmt));
    4762                 :             : 
    4763                 :           0 :           *type_out = v_otype;
    4764                 :             : 
    4765                 :           0 :           return call;
    4766                 :             :         }
    4767                 :             :     }
    4768                 :             : 
    4769                 :             :   return NULL;
    4770                 :             : }
    4771                 :             : 
    4772                 :             : /* Detect a signed division by a constant that wouldn't be
    4773                 :             :    otherwise vectorized:
    4774                 :             : 
    4775                 :             :    type a_t, b_t;
    4776                 :             : 
    4777                 :             :    S1 a_t = b_t / N;
    4778                 :             : 
    4779                 :             :   where type 'type' is an integral type and N is a constant.
    4780                 :             : 
    4781                 :             :   Similarly handle modulo by a constant:
    4782                 :             : 
    4783                 :             :    S4 a_t = b_t % N;
    4784                 :             : 
    4785                 :             :   Input/Output:
    4786                 :             : 
    4787                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4788                 :             :     i.e. the division stmt.  S1 is replaced by if N is a power
    4789                 :             :     of two constant and type is signed:
    4790                 :             :   S3  y_t = b_t < 0 ? N - 1 : 0;
    4791                 :             :   S2  x_t = b_t + y_t;
    4792                 :             :   S1' a_t = x_t >> log2 (N);
    4793                 :             : 
    4794                 :             :     S4 is replaced if N is a power of two constant and
    4795                 :             :     type is signed by (where *_T temporaries have unsigned type):
    4796                 :             :   S9  y_T = b_t < 0 ? -1U : 0U;
    4797                 :             :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    4798                 :             :   S7  z_t = (type) z_T;
    4799                 :             :   S6  w_t = b_t + z_t;
    4800                 :             :   S5  x_t = w_t & (N - 1);
    4801                 :             :   S4' a_t = x_t - z_t;
    4802                 :             : 
    4803                 :             :   Output:
    4804                 :             : 
    4805                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4806                 :             : 
    4807                 :             :   * Return value: A new stmt that will be used to replace the division
    4808                 :             :     S1 or modulo S4 stmt.  */
    4809                 :             : 
    4810                 :             : static gimple *
    4811                 :    29780417 : vect_recog_divmod_pattern (vec_info *vinfo,
    4812                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    4813                 :             : {
    4814                 :    29780417 :   gimple *last_stmt = stmt_vinfo->stmt;
    4815                 :    29780417 :   tree oprnd0, oprnd1, vectype, itype, cond;
    4816                 :    29780417 :   gimple *pattern_stmt, *def_stmt;
    4817                 :    29780417 :   enum tree_code rhs_code;
    4818                 :    29780417 :   optab optab;
    4819                 :    29780417 :   tree q, cst;
    4820                 :    29780417 :   int prec;
    4821                 :             : 
    4822                 :    29780417 :   if (!is_gimple_assign (last_stmt))
    4823                 :             :     return NULL;
    4824                 :             : 
    4825                 :    20351113 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4826                 :    20351113 :   switch (rhs_code)
    4827                 :             :     {
    4828                 :      289519 :     case TRUNC_DIV_EXPR:
    4829                 :      289519 :     case EXACT_DIV_EXPR:
    4830                 :      289519 :     case TRUNC_MOD_EXPR:
    4831                 :      289519 :       break;
    4832                 :             :     default:
    4833                 :             :       return NULL;
    4834                 :             :     }
    4835                 :             : 
    4836                 :      289519 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4837                 :      289519 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4838                 :      289519 :   itype = TREE_TYPE (oprnd0);
    4839                 :      289519 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4840                 :      272392 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4841                 :      183246 :       || TREE_CODE (itype) != INTEGER_TYPE
    4842                 :      472765 :       || !type_has_mode_precision_p (itype))
    4843                 :      106273 :     return NULL;
    4844                 :             : 
    4845                 :      183246 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    4846                 :      183246 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4847                 :      183246 :   if (vectype == NULL_TREE)
    4848                 :             :     return NULL;
    4849                 :             : 
    4850                 :      148284 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    4851                 :             :     {
    4852                 :             :       /* If the target can handle vectorized division or modulo natively,
    4853                 :             :          don't attempt to optimize this, since native division is likely
    4854                 :             :          to give smaller code.  */
    4855                 :        1502 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    4856                 :        1502 :       if (optab != unknown_optab
    4857                 :        1502 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    4858                 :             :         return NULL;
    4859                 :             :     }
    4860                 :             : 
    4861                 :      148284 :   prec = TYPE_PRECISION (itype);
    4862                 :      148284 :   if (integer_pow2p (oprnd1))
    4863                 :             :     {
    4864                 :       72302 :       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
    4865                 :         152 :         return NULL;
    4866                 :             : 
    4867                 :             :       /* Pattern detected.  */
    4868                 :       72150 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    4869                 :             : 
    4870                 :       72150 :       *type_out = vectype;
    4871                 :             : 
    4872                 :             :       /* Check if the target supports this internal function.  */
    4873                 :       72150 :       internal_fn ifn = IFN_DIV_POW2;
    4874                 :       72150 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    4875                 :             :         {
    4876                 :           0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    4877                 :             : 
    4878                 :           0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    4879                 :           0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    4880                 :           0 :           gimple_call_set_lhs (div_stmt, var_div);
    4881                 :             : 
    4882                 :           0 :           if (rhs_code == TRUNC_MOD_EXPR)
    4883                 :             :             {
    4884                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    4885                 :           0 :               def_stmt
    4886                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4887                 :             :                                        LSHIFT_EXPR, var_div, shift);
    4888                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4889                 :           0 :               pattern_stmt
    4890                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4891                 :             :                                        MINUS_EXPR, oprnd0,
    4892                 :             :                                        gimple_assign_lhs (def_stmt));
    4893                 :             :             }
    4894                 :             :           else
    4895                 :             :             pattern_stmt = div_stmt;
    4896                 :           0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    4897                 :             : 
    4898                 :           0 :           return pattern_stmt;
    4899                 :             :         }
    4900                 :             : 
    4901                 :       72150 :       cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    4902                 :       72150 :       def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
    4903                 :             :                                       build_int_cst (itype, 0));
    4904                 :       72150 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
    4905                 :             :                               truth_type_for (vectype), itype);
    4906                 :       72150 :       if (rhs_code == TRUNC_DIV_EXPR
    4907                 :       72150 :           || rhs_code == EXACT_DIV_EXPR)
    4908                 :             :         {
    4909                 :       69341 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    4910                 :       69341 :           tree shift;
    4911                 :       69341 :           def_stmt
    4912                 :       69341 :             = gimple_build_assign (var, COND_EXPR, cond,
    4913                 :             :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4914                 :             :                                                 build_int_cst (itype, 1)),
    4915                 :             :                                    build_int_cst (itype, 0));
    4916                 :       69341 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4917                 :       69341 :           var = vect_recog_temp_ssa_var (itype, NULL);
    4918                 :       69341 :           def_stmt
    4919                 :       69341 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    4920                 :             :                                    gimple_assign_lhs (def_stmt));
    4921                 :       69341 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4922                 :             : 
    4923                 :       69341 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    4924                 :       69341 :           pattern_stmt
    4925                 :       69341 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4926                 :             :                                    RSHIFT_EXPR, var, shift);
    4927                 :             :         }
    4928                 :             :       else
    4929                 :             :         {
    4930                 :        2809 :           tree signmask;
    4931                 :        2809 :           if (compare_tree_int (oprnd1, 2) == 0)
    4932                 :             :             {
    4933                 :        1558 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4934                 :        1558 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    4935                 :             :                                               build_int_cst (itype, 1),
    4936                 :             :                                               build_int_cst (itype, 0));
    4937                 :        1558 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4938                 :             :             }
    4939                 :             :           else
    4940                 :             :             {
    4941                 :        1251 :               tree utype
    4942                 :        1251 :                 = build_nonstandard_integer_type (prec, 1);
    4943                 :        1251 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    4944                 :        1251 :               tree shift
    4945                 :        1251 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    4946                 :        1251 :                                         - tree_log2 (oprnd1));
    4947                 :        1251 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    4948                 :             : 
    4949                 :        1251 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    4950                 :             :                                               build_int_cst (utype, -1),
    4951                 :             :                                               build_int_cst (utype, 0));
    4952                 :        1251 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4953                 :        1251 :               var = vect_recog_temp_ssa_var (utype, NULL);
    4954                 :        1251 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    4955                 :             :                                               gimple_assign_lhs (def_stmt),
    4956                 :             :                                               shift);
    4957                 :        1251 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4958                 :        1251 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4959                 :        1251 :               def_stmt
    4960                 :        1251 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    4961                 :        1251 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4962                 :             :             }
    4963                 :        2809 :           def_stmt
    4964                 :        2809 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4965                 :             :                                    PLUS_EXPR, oprnd0, signmask);
    4966                 :        2809 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4967                 :        2809 :           def_stmt
    4968                 :        2809 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4969                 :             :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    4970                 :             :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4971                 :             :                                                 build_int_cst (itype, 1)));
    4972                 :        2809 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4973                 :             : 
    4974                 :        2809 :           pattern_stmt
    4975                 :        2809 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4976                 :             :                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
    4977                 :             :                                    signmask);
    4978                 :             :         }
    4979                 :             : 
    4980                 :       72150 :       return pattern_stmt;
    4981                 :             :     }
    4982                 :             : 
    4983                 :       75982 :   if ((cst = uniform_integer_cst_p (oprnd1))
    4984                 :       75982 :       && TYPE_UNSIGNED (itype)
    4985                 :             :       && rhs_code == TRUNC_DIV_EXPR
    4986                 :       47057 :       && vectype
    4987                 :      106162 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    4988                 :             :     {
    4989                 :             :       /* We can use the relationship:
    4990                 :             : 
    4991                 :             :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    4992                 :             : 
    4993                 :             :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    4994                 :             :          is therefore a shift right.  When operating in modes that are
    4995                 :             :          multiples of a byte in size, there are two cases:
    4996                 :             : 
    4997                 :             :          (1) N(N+3) is not representable, in which case the question
    4998                 :             :              becomes whether the replacement expression overflows.
    4999                 :             :              It is enough to test that x+N+2 does not overflow,
    5000                 :             :              i.e. that x < MAX-(N+1).
    5001                 :             : 
    5002                 :             :          (2) N(N+3) is representable, in which case it is the (only)
    5003                 :             :              bound that we need to check.
    5004                 :             : 
    5005                 :             :          ??? For now we just handle the case where // (N+1) is a shift
    5006                 :             :          right by half the precision, since some architectures can
    5007                 :             :          optimize the associated addition and shift combinations
    5008                 :             :          into single instructions.  */
    5009                 :             : 
    5010                 :       19083 :       auto wcst = wi::to_wide (cst);
    5011                 :       19083 :       int pow = wi::exact_log2 (wcst + 1);
    5012                 :       19083 :       if (pow == prec / 2)
    5013                 :             :         {
    5014                 :         572 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    5015                 :             : 
    5016                 :         572 :           gimple_ranger ranger;
    5017                 :         572 :           int_range_max r;
    5018                 :             : 
    5019                 :             :           /* Check that no overflow will occur.  If we don't have range
    5020                 :             :              information we can't perform the optimization.  */
    5021                 :             : 
    5022                 :         572 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    5023                 :             :             {
    5024                 :         570 :               wide_int max = r.upper_bound ();
    5025                 :         570 :               wide_int one = wi::shwi (1, prec);
    5026                 :         570 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    5027                 :         570 :               wi::overflow_type ovf;
    5028                 :         570 :               wi::add (max, adder, UNSIGNED, &ovf);
    5029                 :         570 :               if (ovf == wi::OVF_NONE)
    5030                 :             :                 {
    5031                 :         333 :                   *type_out = vectype;
    5032                 :         333 :                   tree tadder = wide_int_to_tree (itype, adder);
    5033                 :         333 :                   tree rshift = wide_int_to_tree (itype, pow);
    5034                 :             : 
    5035                 :         333 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    5036                 :         333 :                   gassign *patt1
    5037                 :         333 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    5038                 :         333 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5039                 :             : 
    5040                 :         333 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    5041                 :         333 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    5042                 :             :                                                rshift);
    5043                 :         333 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5044                 :             : 
    5045                 :         333 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    5046                 :         333 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    5047                 :             :                                                oprnd0);
    5048                 :         333 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5049                 :             : 
    5050                 :         333 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    5051                 :         333 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    5052                 :             :                                                       new_lhs3, rshift);
    5053                 :             : 
    5054                 :         333 :                   return pattern_stmt;
    5055                 :             :                 }
    5056                 :         570 :             }
    5057                 :         572 :         }
    5058                 :             :     }
    5059                 :             : 
    5060                 :       75649 :   if (prec > HOST_BITS_PER_WIDE_INT
    5061                 :       75649 :       || integer_zerop (oprnd1))
    5062                 :         572 :     return NULL;
    5063                 :             : 
    5064                 :       75077 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    5065                 :             :     return NULL;
    5066                 :             : 
    5067                 :       22385 :   if (TYPE_UNSIGNED (itype))
    5068                 :             :     {
    5069                 :       15724 :       unsigned HOST_WIDE_INT mh, ml;
    5070                 :       15724 :       int pre_shift, post_shift;
    5071                 :       15724 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    5072                 :       15724 :                                   & GET_MODE_MASK (itype_mode));
    5073                 :       15724 :       tree t1, t2, t3, t4;
    5074                 :             : 
    5075                 :       15724 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    5076                 :             :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    5077                 :          23 :         return NULL;
    5078                 :             : 
    5079                 :             :       /* Find a suitable multiplier and right shift count instead of
    5080                 :             :          directly dividing by D.  */
    5081                 :       15701 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
    5082                 :             : 
    5083                 :             :       /* If the suggested multiplier is more than PREC bits, we can do better
    5084                 :             :          for even divisors, using an initial right shift.  */
    5085                 :       15701 :       if (mh != 0 && (d & 1) == 0)
    5086                 :             :         {
    5087                 :         448 :           pre_shift = ctz_or_zero (d);
    5088                 :         448 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    5089                 :             :                                   &ml, &post_shift);
    5090                 :         448 :           gcc_assert (!mh);
    5091                 :             :         }
    5092                 :             :       else
    5093                 :             :         pre_shift = 0;
    5094                 :             : 
    5095                 :         932 :       if (mh != 0)
    5096                 :             :         {
    5097                 :         932 :           if (post_shift - 1 >= prec)
    5098                 :             :             return NULL;
    5099                 :             : 
    5100                 :             :           /* t1 = oprnd0 h* ml;
    5101                 :             :              t2 = oprnd0 - t1;
    5102                 :             :              t3 = t2 >> 1;
    5103                 :             :              t4 = t1 + t3;
    5104                 :             :              q = t4 >> (post_shift - 1);  */
    5105                 :         932 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    5106                 :         932 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5107                 :         932 :                                           build_int_cst (itype, ml));
    5108                 :         932 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5109                 :             : 
    5110                 :         932 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5111                 :         932 :           def_stmt
    5112                 :         932 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    5113                 :         932 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5114                 :             : 
    5115                 :         932 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5116                 :         932 :           def_stmt
    5117                 :         932 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    5118                 :         932 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5119                 :             : 
    5120                 :         932 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5121                 :         932 :           def_stmt
    5122                 :         932 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    5123                 :             : 
    5124                 :         932 :           if (post_shift != 1)
    5125                 :             :             {
    5126                 :         932 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5127                 :             : 
    5128                 :         932 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5129                 :         932 :               pattern_stmt
    5130                 :         932 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    5131                 :         932 :                                        build_int_cst (itype, post_shift - 1));
    5132                 :             :             }
    5133                 :             :           else
    5134                 :             :             {
    5135                 :             :               q = t4;
    5136                 :             :               pattern_stmt = def_stmt;
    5137                 :             :             }
    5138                 :             :         }
    5139                 :             :       else
    5140                 :             :         {
    5141                 :       14769 :           if (pre_shift >= prec || post_shift >= prec)
    5142                 :             :             return NULL;
    5143                 :             : 
    5144                 :             :           /* t1 = oprnd0 >> pre_shift;
    5145                 :             :              t2 = t1 h* ml;
    5146                 :             :              q = t2 >> post_shift;  */
    5147                 :       14769 :           if (pre_shift)
    5148                 :             :             {
    5149                 :         448 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    5150                 :         448 :               def_stmt
    5151                 :         448 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    5152                 :         448 :                                        build_int_cst (NULL, pre_shift));
    5153                 :         448 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5154                 :             :             }
    5155                 :             :           else
    5156                 :             :             t1 = oprnd0;
    5157                 :             : 
    5158                 :       14769 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5159                 :       14769 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    5160                 :       14769 :                                           build_int_cst (itype, ml));
    5161                 :             : 
    5162                 :       14769 :           if (post_shift)
    5163                 :             :             {
    5164                 :       14763 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5165                 :             : 
    5166                 :       14763 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5167                 :       14763 :               def_stmt
    5168                 :       14763 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    5169                 :       14763 :                                        build_int_cst (itype, post_shift));
    5170                 :             :             }
    5171                 :             :           else
    5172                 :             :             q = t2;
    5173                 :             : 
    5174                 :             :           pattern_stmt = def_stmt;
    5175                 :             :         }
    5176                 :             :     }
    5177                 :             :   else
    5178                 :             :     {
    5179                 :        6661 :       unsigned HOST_WIDE_INT ml;
    5180                 :        6661 :       int post_shift;
    5181                 :        6661 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5182                 :        6661 :       unsigned HOST_WIDE_INT abs_d;
    5183                 :        6661 :       bool add = false;
    5184                 :        6661 :       tree t1, t2, t3, t4;
    5185                 :             : 
    5186                 :             :       /* Give up for -1.  */
    5187                 :        6661 :       if (d == -1)
    5188                 :           0 :         return NULL;
    5189                 :             : 
    5190                 :             :       /* Since d might be INT_MIN, we have to cast to
    5191                 :             :          unsigned HOST_WIDE_INT before negating to avoid
    5192                 :             :          undefined signed overflow.  */
    5193                 :        6661 :       abs_d = (d >= 0
    5194                 :        6661 :                ? (unsigned HOST_WIDE_INT) d
    5195                 :             :                : - (unsigned HOST_WIDE_INT) d);
    5196                 :             : 
    5197                 :             :       /* n rem d = n rem -d */
    5198                 :        6661 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    5199                 :             :         {
    5200                 :           0 :           d = abs_d;
    5201                 :           0 :           oprnd1 = build_int_cst (itype, abs_d);
    5202                 :             :         }
    5203                 :        6661 :       if (HOST_BITS_PER_WIDE_INT >= prec
    5204                 :        6661 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    5205                 :             :         /* This case is not handled correctly below.  */
    5206                 :             :         return NULL;
    5207                 :             : 
    5208                 :        6661 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
    5209                 :        6661 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    5210                 :             :         {
    5211                 :        1472 :           add = true;
    5212                 :        1472 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    5213                 :             :         }
    5214                 :        6661 :       if (post_shift >= prec)
    5215                 :             :         return NULL;
    5216                 :             : 
    5217                 :             :       /* t1 = oprnd0 h* ml;  */
    5218                 :        6661 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5219                 :        6661 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5220                 :        6661 :                                       build_int_cst (itype, ml));
    5221                 :             : 
    5222                 :        6661 :       if (add)
    5223                 :             :         {
    5224                 :             :           /* t2 = t1 + oprnd0;  */
    5225                 :        1472 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5226                 :        1472 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5227                 :        1472 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    5228                 :             :         }
    5229                 :             :       else
    5230                 :             :         t2 = t1;
    5231                 :             : 
    5232                 :        6661 :       if (post_shift)
    5233                 :             :         {
    5234                 :             :           /* t3 = t2 >> post_shift;  */
    5235                 :        6038 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5236                 :        6038 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5237                 :        6038 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    5238                 :        6038 :                                           build_int_cst (itype, post_shift));
    5239                 :             :         }
    5240                 :             :       else
    5241                 :             :         t3 = t2;
    5242                 :             : 
    5243                 :        6661 :       int msb = 1;
    5244                 :        6661 :       int_range_max r;
    5245                 :       13322 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    5246                 :        6661 :       if (!r.varying_p () && !r.undefined_p ())
    5247                 :             :         {
    5248                 :        3922 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    5249                 :             :             msb = 0;
    5250                 :         695 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    5251                 :             :             msb = -1;
    5252                 :             :         }
    5253                 :             : 
    5254                 :        3227 :       if (msb == 0 && d >= 0)
    5255                 :             :         {
    5256                 :             :           /* q = t3;  */
    5257                 :             :           q = t3;
    5258                 :             :           pattern_stmt = def_stmt;
    5259                 :             :         }
    5260                 :             :       else
    5261                 :             :         {
    5262                 :             :           /* t4 = oprnd0 >> (prec - 1);
    5263                 :             :              or if we know from VRP that oprnd0 >= 0
    5264                 :             :              t4 = 0;
    5265                 :             :              or if we know from VRP that oprnd0 < 0
    5266                 :             :              t4 = -1;  */
    5267                 :        3494 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5268                 :        3494 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5269                 :        3494 :           if (msb != 1)
    5270                 :          68 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5271                 :          68 :                                             build_int_cst (itype, msb));
    5272                 :             :           else
    5273                 :        3426 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5274                 :        3426 :                                             build_int_cst (itype, prec - 1));
    5275                 :        3494 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5276                 :             : 
    5277                 :             :           /* q = t3 - t4;  or q = t4 - t3;  */
    5278                 :        3494 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5279                 :        6823 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5280                 :             :                                               d < 0 ? t3 : t4);
    5281                 :             :         }
    5282                 :        6661 :     }
    5283                 :             : 
    5284                 :       22362 :   if (rhs_code == TRUNC_MOD_EXPR)
    5285                 :             :     {
    5286                 :        9282 :       tree r, t1;
    5287                 :             : 
    5288                 :             :       /* We divided.  Now finish by:
    5289                 :             :          t1 = q * oprnd1;
    5290                 :             :          r = oprnd0 - t1;  */
    5291                 :        9282 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5292                 :             : 
    5293                 :        9282 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5294                 :        9282 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5295                 :        9282 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5296                 :             : 
    5297                 :        9282 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5298                 :        9282 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5299                 :             :     }
    5300                 :             : 
    5301                 :             :   /* Pattern detected.  */
    5302                 :       22362 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5303                 :             : 
    5304                 :       22362 :   *type_out = vectype;
    5305                 :       22362 :   return pattern_stmt;
    5306                 :             : }
    5307                 :             : 
    5308                 :             : /* Detects pattern with a modulo operation (S1) where both arguments
    5309                 :             :    are variables of integral type.
    5310                 :             :    The statement is replaced by division, multiplication, and subtraction.
    5311                 :             :    The last statement (S4) is returned.
    5312                 :             : 
    5313                 :             :    Example:
    5314                 :             :    S1 c_t = a_t % b_t;
    5315                 :             : 
    5316                 :             :    is replaced by
    5317                 :             :    S2 x_t = a_t / b_t;
    5318                 :             :    S3 y_t = x_t * b_t;
    5319                 :             :    S4 z_t = a_t - y_t;  */
    5320                 :             : 
    5321                 :             : static gimple *
    5322                 :    29964791 : vect_recog_mod_var_pattern (vec_info *vinfo,
    5323                 :             :                             stmt_vec_info stmt_vinfo, tree *type_out)
    5324                 :             : {
    5325                 :    29964791 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5326                 :    29964791 :   tree oprnd0, oprnd1, vectype, itype;
    5327                 :    29964791 :   gimple *pattern_stmt, *def_stmt;
    5328                 :    29964791 :   enum tree_code rhs_code;
    5329                 :             : 
    5330                 :    29964791 :   if (!is_gimple_assign (last_stmt))
    5331                 :             :     return NULL;
    5332                 :             : 
    5333                 :    20535487 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5334                 :    20535487 :   if (rhs_code != TRUNC_MOD_EXPR)
    5335                 :             :     return NULL;
    5336                 :             : 
    5337                 :       71634 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5338                 :       71634 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5339                 :       71634 :   itype = TREE_TYPE (oprnd0);
    5340                 :       71634 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5341                 :       63590 :       || TREE_CODE (oprnd1) != SSA_NAME
    5342                 :       40812 :       || TREE_CODE (itype) != INTEGER_TYPE)
    5343                 :             :     return NULL;
    5344                 :             : 
    5345                 :       40708 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5346                 :             : 
    5347                 :       40708 :   if (!vectype
    5348                 :       32969 :       || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
    5349                 :       32969 :       || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
    5350                 :           0 :       || !target_has_vecop_for_code (MULT_EXPR, vectype)
    5351                 :       40708 :       || !target_has_vecop_for_code (MINUS_EXPR, vectype))
    5352                 :       40708 :     return NULL;
    5353                 :             : 
    5354                 :           0 :   tree q, tmp, r;
    5355                 :           0 :   q = vect_recog_temp_ssa_var (itype, NULL);
    5356                 :           0 :   def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
    5357                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5358                 :             : 
    5359                 :           0 :   tmp = vect_recog_temp_ssa_var (itype, NULL);
    5360                 :           0 :   def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
    5361                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5362                 :             : 
    5363                 :           0 :   r = vect_recog_temp_ssa_var (itype, NULL);
    5364                 :           0 :   pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
    5365                 :             : 
    5366                 :             :   /* Pattern detected.  */
    5367                 :           0 :   *type_out = vectype;
    5368                 :           0 :   vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
    5369                 :             : 
    5370                 :           0 :   return pattern_stmt;
    5371                 :             : }
    5372                 :             : 
    5373                 :             : 
    5374                 :             : /* Return the proper type for converting bool VAR into
    5375                 :             :    an integer value or NULL_TREE if no such type exists.
    5376                 :             :    The type is chosen so that the converted value has the
    5377                 :             :    same number of elements as VAR's vector type.  */
    5378                 :             : 
    5379                 :             : static tree
    5380                 :     3498862 : integer_type_for_mask (tree var, vec_info *vinfo)
    5381                 :             : {
    5382                 :     3498862 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5383                 :             :     return NULL_TREE;
    5384                 :             : 
    5385                 :     1225902 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    5386                 :     1225902 :   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
    5387                 :             :     return NULL_TREE;
    5388                 :             : 
    5389                 :      676547 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5390                 :             : }
    5391                 :             : 
    5392                 :             : /* Function vect_recog_gcond_pattern
    5393                 :             : 
    5394                 :             :    Try to find pattern like following:
    5395                 :             : 
    5396                 :             :      if (a op b)
    5397                 :             : 
    5398                 :             :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5399                 :             : 
    5400                 :             :      mask = a op b
    5401                 :             :      if (mask != 0)
    5402                 :             : 
    5403                 :             :    and set the mask type on MASK.
    5404                 :             : 
    5405                 :             :    Input:
    5406                 :             : 
    5407                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5408                 :             :                  search begins, i.e. cast of a bool to
    5409                 :             :                  an integer type.
    5410                 :             : 
    5411                 :             :    Output:
    5412                 :             : 
    5413                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5414                 :             : 
    5415                 :             :    * Return value: A new stmt that will be used to replace the pattern.  */
    5416                 :             : 
    5417                 :             : static gimple *
    5418                 :    30045752 : vect_recog_gcond_pattern (vec_info *vinfo,
    5419                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5420                 :             : {
    5421                 :             :   /* Currently we only support this for loop vectorization and when multiple
    5422                 :             :      exits.  */
    5423                 :    30045752 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5424                 :     3336372 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5425                 :             :     return NULL;
    5426                 :             : 
    5427                 :     1081104 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5428                 :     1081104 :   gcond* cond = NULL;
    5429                 :    30065373 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5430                 :             :     return NULL;
    5431                 :             : 
    5432                 :      283317 :   auto lhs = gimple_cond_lhs (cond);
    5433                 :      283317 :   auto rhs = gimple_cond_rhs (cond);
    5434                 :      283317 :   auto code = gimple_cond_code (cond);
    5435                 :             : 
    5436                 :      283317 :   tree scalar_type = TREE_TYPE (lhs);
    5437                 :      283317 :   if (VECTOR_TYPE_P (scalar_type))
    5438                 :             :     return NULL;
    5439                 :             : 
    5440                 :             :   /* If the input is a boolean then try to figure out the precision that the
    5441                 :             :      vector type should use.  We cannot use the scalar precision as this would
    5442                 :             :      later mismatch.  This is similar to what recog_bool does.  */
    5443                 :      283317 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5444                 :             :     {
    5445                 :        8546 :       if (tree stype = integer_type_for_mask (lhs, vinfo))
    5446                 :      283317 :         scalar_type = stype;
    5447                 :             :     }
    5448                 :             : 
    5449                 :      283317 :   tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
    5450                 :      283317 :   if (vectype == NULL_TREE)
    5451                 :             :     return NULL;
    5452                 :             : 
    5453                 :      263696 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5454                 :      263696 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5455                 :      263696 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5456                 :             : 
    5457                 :      263696 :   gimple *pattern_stmt
    5458                 :      263696 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5459                 :      263696 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5460                 :             :                          NULL_TREE, NULL_TREE);
    5461                 :      263696 :   *type_out = vectype;
    5462                 :      263696 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5463                 :      263696 :   return pattern_stmt;
    5464                 :             : }
    5465                 :             : 
    5466                 :             : /* Function vect_recog_bool_pattern
    5467                 :             : 
    5468                 :             :    Try to find pattern like following:
    5469                 :             : 
    5470                 :             :      bool a_b, b_b, c_b, d_b, e_b;
    5471                 :             :      TYPE f_T;
    5472                 :             :    loop:
    5473                 :             :      S1  a_b = x1 CMP1 y1;
    5474                 :             :      S2  b_b = x2 CMP2 y2;
    5475                 :             :      S3  c_b = a_b & b_b;
    5476                 :             :      S4  d_b = x3 CMP3 y3;
    5477                 :             :      S5  e_b = c_b | d_b;
    5478                 :             :      S6  f_T = (TYPE) e_b;
    5479                 :             : 
    5480                 :             :    where type 'TYPE' is an integral type.  Or a similar pattern
    5481                 :             :    ending in
    5482                 :             : 
    5483                 :             :      S6  f_Y = e_b ? r_Y : s_Y;
    5484                 :             : 
    5485                 :             :    as results from if-conversion of a complex condition.
    5486                 :             : 
    5487                 :             :    Input:
    5488                 :             : 
    5489                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5490                 :             :                  search begins, i.e. cast of a bool to
    5491                 :             :                  an integer type.
    5492                 :             : 
    5493                 :             :    Output:
    5494                 :             : 
    5495                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5496                 :             : 
    5497                 :             :    * Return value: A new stmt that will be used to replace the pattern.
    5498                 :             : 
    5499                 :             :         Assuming size of TYPE is the same as size of all comparisons
    5500                 :             :         (otherwise some casts would be added where needed), the above
    5501                 :             :         sequence we create related pattern stmts:
    5502                 :             :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5503                 :             :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5504                 :             :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5505                 :             :         S5'  e_T = c_T | d_T;
    5506                 :             :         S6'  f_T = e_T;
    5507                 :             : 
    5508                 :             :         Instead of the above S3' we could emit:
    5509                 :             :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5510                 :             :         S3'  c_T = a_T | b_T;
    5511                 :             :         but the above is more efficient.  */
    5512                 :             : 
    5513                 :             : static gimple *
    5514                 :    30045752 : vect_recog_bool_pattern (vec_info *vinfo,
    5515                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5516                 :             : {
    5517                 :    30045752 :   gimple *last_stmt = stmt_vinfo->stmt;
    5518                 :    30045752 :   enum tree_code rhs_code;
    5519                 :    30045752 :   tree var, lhs, rhs, vectype;
    5520                 :    30045752 :   gimple *pattern_stmt;
    5521                 :             : 
    5522                 :    30045752 :   if (!is_gimple_assign (last_stmt))
    5523                 :             :     return NULL;
    5524                 :             : 
    5525                 :    20880144 :   var = gimple_assign_rhs1 (last_stmt);
    5526                 :    20880144 :   lhs = gimple_assign_lhs (last_stmt);
    5527                 :    20880144 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5528                 :             : 
    5529                 :    20880144 :   if (rhs_code == VIEW_CONVERT_EXPR)
    5530                 :      170285 :     var = TREE_OPERAND (var, 0);
    5531                 :             : 
    5532                 :    20880144 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5533                 :             :     return NULL;
    5534                 :             : 
    5535                 :      729447 :   hash_set<gimple *> bool_stmts;
    5536                 :             : 
    5537                 :      729447 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    5538                 :      636747 :       || rhs_code == VIEW_CONVERT_EXPR)
    5539                 :             :     {
    5540                 :      192762 :       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    5541                 :      192631 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    5542                 :             :         return NULL;
    5543                 :       92759 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5544                 :             : 
    5545                 :       92759 :       tree type = integer_type_for_mask (var, vinfo);
    5546                 :       92759 :       tree cst0, cst1, tmp;
    5547                 :             : 
    5548                 :       92759 :       if (!type)
    5549                 :             :         return NULL;
    5550                 :             : 
    5551                 :             :       /* We may directly use cond with narrowed type to avoid multiple cond
    5552                 :             :          exprs with following result packing and perform single cond with
    5553                 :             :          packed mask instead.  In case of widening we better make cond first
    5554                 :             :          and then extract results.  */
    5555                 :       45848 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    5556                 :       31246 :         type = TREE_TYPE (lhs);
    5557                 :             : 
    5558                 :       45848 :       cst0 = build_int_cst (type, 0);
    5559                 :       45848 :       cst1 = build_int_cst (type, 1);
    5560                 :       45848 :       tmp = vect_recog_temp_ssa_var (type, NULL);
    5561                 :       45848 :       pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    5562                 :             : 
    5563                 :       45848 :       if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    5564                 :             :         {
    5565                 :       14602 :           tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5566                 :       14602 :           append_pattern_def_seq (vinfo, stmt_vinfo,
    5567                 :             :                                   pattern_stmt, new_vectype);
    5568                 :             : 
    5569                 :       14602 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5570                 :       14602 :           pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
    5571                 :             :         }
    5572                 :             : 
    5573                 :       45848 :       *type_out = vectype;
    5574                 :       45848 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5575                 :             : 
    5576                 :       45848 :       return pattern_stmt;
    5577                 :             :     }
    5578                 :      632717 :   else if (rhs_code == COND_EXPR
    5579                 :      173729 :            && TREE_CODE (var) == SSA_NAME)
    5580                 :             :     {
    5581                 :      173729 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5582                 :      173729 :       if (vectype == NULL_TREE)
    5583                 :             :         return NULL;
    5584                 :             : 
    5585                 :             :       /* Build a scalar type for the boolean result that when
    5586                 :             :          vectorized matches the vector type of the result in
    5587                 :             :          size and number of elements.  */
    5588                 :      160336 :       unsigned prec
    5589                 :      160336 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    5590                 :             :                                TYPE_VECTOR_SUBPARTS (vectype));
    5591                 :             : 
    5592                 :      160336 :       tree type
    5593                 :      320672 :         = build_nonstandard_integer_type (prec,
    5594                 :      160336 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    5595                 :      160336 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    5596                 :             :         return NULL;
    5597                 :             : 
    5598                 :      160336 :       enum vect_def_type dt;
    5599                 :      160336 :       if (integer_type_for_mask (var, vinfo))
    5600                 :             :         return NULL;
    5601                 :       27244 :       else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
    5602                 :       27244 :                && vect_is_simple_use (var, vinfo, &dt)
    5603                 :       27244 :                && (dt == vect_external_def
    5604                 :       27237 :                    || dt == vect_constant_def))
    5605                 :             :         {
    5606                 :             :           /* If the condition is already a boolean then manually convert it to a
    5607                 :             :              mask of the given integer type but don't set a vectype.  */
    5608                 :        1229 :           tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
    5609                 :        1229 :           pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
    5610                 :             :                                               build_all_ones_cst (type),
    5611                 :             :                                               build_zero_cst (type));
    5612                 :        1229 :           append_inv_pattern_def_seq (vinfo, pattern_stmt);
    5613                 :        1229 :           var = lhs_ivar;
    5614                 :             :         }
    5615                 :             : 
    5616                 :       27244 :       tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5617                 :       27244 :       pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
    5618                 :       27244 :                                           build_zero_cst (TREE_TYPE (var)));
    5619                 :             : 
    5620                 :       27244 :       tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
    5621                 :       27244 :       if (!new_vectype)
    5622                 :             :         return NULL;
    5623                 :             : 
    5624                 :       27244 :       new_vectype = truth_type_for (new_vectype);
    5625                 :       27244 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
    5626                 :       27244 :                               TREE_TYPE (var));
    5627                 :             : 
    5628                 :       27244 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5629                 :       27244 :       pattern_stmt
    5630                 :       27244 :         = gimple_build_assign (lhs, COND_EXPR, lhs_var,
    5631                 :             :                                gimple_assign_rhs2 (last_stmt),
    5632                 :             :                                gimple_assign_rhs3 (last_stmt));
    5633                 :       27244 :       *type_out = vectype;
    5634                 :       27244 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5635                 :             : 
    5636                 :       27244 :       return pattern_stmt;
    5637                 :             :     }
    5638                 :      458988 :   else if (rhs_code == SSA_NAME
    5639                 :       29235 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    5640                 :             :     {
    5641                 :        7914 :       stmt_vec_info pattern_stmt_info;
    5642                 :        7914 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5643                 :        7914 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    5644                 :           0 :         return NULL;
    5645                 :             : 
    5646                 :        7914 :       tree type = integer_type_for_mask (var, vinfo);
    5647                 :        7914 :       tree cst0, cst1, new_vectype;
    5648                 :             : 
    5649                 :        7914 :       if (!type)
    5650                 :             :         return NULL;
    5651                 :             : 
    5652                 :        4448 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
    5653                 :         541 :         type = TREE_TYPE (vectype);
    5654                 :             : 
    5655                 :        4448 :       cst0 = build_int_cst (type, 0);
    5656                 :        4448 :       cst1 = build_int_cst (type, 1);
    5657                 :        4448 :       new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5658                 :             : 
    5659                 :        4448 :       rhs = vect_recog_temp_ssa_var (type, NULL);
    5660                 :        4448 :       pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    5661                 :        4448 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
    5662                 :             : 
    5663                 :        4448 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    5664                 :        4448 :       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    5665                 :             :         {
    5666                 :        3907 :           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5667                 :        3907 :           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
    5668                 :        3907 :           append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
    5669                 :        3907 :           rhs = rhs2;
    5670                 :             :         }
    5671                 :        4448 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    5672                 :        4448 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5673                 :        4448 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5674                 :        4448 :       *type_out = vectype;
    5675                 :        4448 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5676                 :             : 
    5677                 :        4448 :       return pattern_stmt;
    5678                 :             :     }
    5679                 :             :   else
    5680                 :             :     return NULL;
    5681                 :      729447 : }
    5682                 :             : 
    5683                 :             : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5684                 :             :    conversion of MASK to a type suitable for masking VECTYPE.
    5685                 :             :    Built statement gets required vectype and is appended to
    5686                 :             :    a pattern sequence of STMT_VINFO.
    5687                 :             : 
    5688                 :             :    Return converted mask.  */
    5689                 :             : 
    5690                 :             : static tree
    5691                 :       53432 : build_mask_conversion (vec_info *vinfo,
    5692                 :             :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5693                 :             : {
    5694                 :       53432 :   gimple *stmt;
    5695                 :       53432 :   tree masktype, tmp;
    5696                 :             : 
    5697                 :       53432 :   masktype = truth_type_for (vectype);
    5698                 :       53432 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5699                 :       53432 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5700                 :       53432 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5701                 :       53432 :                           stmt, masktype, TREE_TYPE (vectype));
    5702                 :             : 
    5703                 :       53432 :   return tmp;
    5704                 :             : }
    5705                 :             : 
    5706                 :             : 
    5707                 :             : /* Function vect_recog_mask_conversion_pattern
    5708                 :             : 
    5709                 :             :    Try to find statements which require boolean type
    5710                 :             :    converison.  Additional conversion statements are
    5711                 :             :    added to handle such cases.  For example:
    5712                 :             : 
    5713                 :             :    bool m_1, m_2, m_3;
    5714                 :             :    int i_4, i_5;
    5715                 :             :    double d_6, d_7;
    5716                 :             :    char c_1, c_2, c_3;
    5717                 :             : 
    5718                 :             :    S1   m_1 = i_4 > i_5;
    5719                 :             :    S2   m_2 = d_6 < d_7;
    5720                 :             :    S3   m_3 = m_1 & m_2;
    5721                 :             :    S4   c_1 = m_3 ? c_2 : c_3;
    5722                 :             : 
    5723                 :             :    Will be transformed into:
    5724                 :             : 
    5725                 :             :    S1   m_1 = i_4 > i_5;
    5726                 :             :    S2   m_2 = d_6 < d_7;
    5727                 :             :    S3'' m_2' = (_Bool[bitsize=32])m_2
    5728                 :             :    S3'  m_3' = m_1 & m_2';
    5729                 :             :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    5730                 :             :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    5731                 :             : 
    5732                 :             : static gimple *
    5733                 :    30018492 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    5734                 :             :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    5735                 :             : {
    5736                 :    30018492 :   gimple *last_stmt = stmt_vinfo->stmt;
    5737                 :    30018492 :   enum tree_code rhs_code;
    5738                 :    30018492 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    5739                 :    30018492 :   tree vectype1, vectype2;
    5740                 :    30018492 :   stmt_vec_info pattern_stmt_info;
    5741                 :             : 
    5742                 :             :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    5743                 :             :      conversion.  */
    5744                 :    30018492 :   if (is_gimple_call (last_stmt)
    5745                 :    30018492 :       && gimple_call_internal_p (last_stmt))
    5746                 :             :     {
    5747                 :       90438 :       gcall *pattern_stmt;
    5748                 :             : 
    5749                 :       90438 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    5750                 :       90438 :       int mask_argno = internal_fn_mask_index (ifn);
    5751                 :       90438 :       if (mask_argno < 0)
    5752                 :             :         return NULL;
    5753                 :             : 
    5754                 :        8721 :       bool store_p = internal_store_fn_p (ifn);
    5755                 :        8721 :       bool load_p = internal_store_fn_p (ifn);
    5756                 :        8721 :       if (store_p)
    5757                 :             :         {
    5758                 :        1577 :           int rhs_index = internal_fn_stored_value_index (ifn);
    5759                 :        1577 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    5760                 :        1577 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    5761                 :             :         }
    5762                 :             :       else
    5763                 :             :         {
    5764                 :        7144 :           lhs = gimple_call_lhs (last_stmt);
    5765                 :        7144 :           if (!lhs)
    5766                 :             :             return NULL;
    5767                 :        7144 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5768                 :             :         }
    5769                 :             : 
    5770                 :        8721 :       if (!vectype1)
    5771                 :             :         return NULL;
    5772                 :             : 
    5773                 :        8577 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    5774                 :        8577 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    5775                 :        8577 :       if (mask_arg_type)
    5776                 :             :         {
    5777                 :        7623 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    5778                 :             : 
    5779                 :        7623 :           if (!vectype2
    5780                 :        7623 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    5781                 :             :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    5782                 :        4489 :             return NULL;
    5783                 :             :         }
    5784                 :         954 :       else if (store_p || load_p)
    5785                 :             :         return NULL;
    5786                 :             : 
    5787                 :        3889 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    5788                 :             : 
    5789                 :        3889 :       auto_vec<tree, 8> args;
    5790                 :        3889 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    5791                 :        3889 :       args.safe_grow (nargs, true);
    5792                 :       19445 :       for (unsigned int i = 0; i < nargs; ++i)
    5793                 :       15556 :         args[i] = ((int) i == mask_argno
    5794                 :       15556 :                    ? tmp
    5795                 :       11667 :                    : gimple_call_arg (last_stmt, i));
    5796                 :        3889 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    5797                 :             : 
    5798                 :        3889 :       if (!store_p)
    5799                 :             :         {
    5800                 :        3635 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5801                 :        3635 :           gimple_call_set_lhs (pattern_stmt, lhs);
    5802                 :             :         }
    5803                 :             : 
    5804                 :        3635 :       if (load_p || store_p)
    5805                 :         254 :         gimple_call_set_nothrow (pattern_stmt, true);
    5806                 :             : 
    5807                 :        3889 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5808                 :        3889 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    5809                 :        1533 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5810                 :             : 
    5811                 :        3889 :       *type_out = vectype1;
    5812                 :        3889 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5813                 :             : 
    5814                 :        3889 :       return pattern_stmt;
    5815                 :        3889 :     }
    5816                 :             : 
    5817                 :    29928054 :   if (!is_gimple_assign (last_stmt))
    5818                 :             :     return NULL;
    5819                 :             : 
    5820                 :    20852884 :   gimple *pattern_stmt;
    5821                 :    20852884 :   lhs = gimple_assign_lhs (last_stmt);
    5822                 :    20852884 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    5823                 :    20852884 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5824                 :             : 
    5825                 :             :   /* Check for cond expression requiring mask conversion.  */
    5826                 :    20852884 :   if (rhs_code == COND_EXPR)
    5827                 :             :     {
    5828                 :      165634 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5829                 :             : 
    5830                 :      165634 :       gcc_assert (! COMPARISON_CLASS_P (rhs1));
    5831                 :      165634 :       if (TREE_CODE (rhs1) == SSA_NAME)
    5832                 :             :         {
    5833                 :      165634 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    5834                 :      165634 :           if (!rhs1_type)
    5835                 :             :             return NULL;
    5836                 :             :         }
    5837                 :             :       else
    5838                 :             :         return NULL;
    5839                 :             : 
    5840                 :      153960 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    5841                 :             : 
    5842                 :      153960 :       if (!vectype1 || !vectype2)
    5843                 :             :         return NULL;
    5844                 :             : 
    5845                 :             :       /* Continue if a conversion is needed.  Also continue if we have
    5846                 :             :          a comparison whose vector type would normally be different from
    5847                 :             :          VECTYPE2 when considered in isolation.  In that case we'll
    5848                 :             :          replace the comparison with an SSA name (so that we can record
    5849                 :             :          its vector type) and behave as though the comparison was an SSA
    5850                 :             :          name from the outset.  */
    5851                 :      152221 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    5852                 :             :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    5853                 :             :         return NULL;
    5854                 :             : 
    5855                 :       30456 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    5856                 :       60912 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    5857                 :       30456 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    5858                 :             :       else
    5859                 :             :         tmp = rhs1;
    5860                 :             : 
    5861                 :       30456 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5862                 :       30456 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    5863                 :             :                                           gimple_assign_rhs2 (last_stmt),
    5864                 :             :                                           gimple_assign_rhs3 (last_stmt));
    5865                 :             : 
    5866                 :       30456 :       *type_out = vectype1;
    5867                 :       30456 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5868                 :             : 
    5869                 :       30456 :       return pattern_stmt;
    5870                 :             :     }
    5871                 :             : 
    5872                 :             :   /* Now check for binary boolean operations requiring conversion for
    5873                 :             :      one of operands.  */
    5874                 :    20687250 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    5875                 :             :     return NULL;
    5876                 :             : 
    5877                 :     1657643 :   if (rhs_code != BIT_IOR_EXPR
    5878                 :             :       && rhs_code != BIT_XOR_EXPR
    5879                 :     1657643 :       && rhs_code != BIT_AND_EXPR
    5880                 :     1303874 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    5881                 :             :     return NULL;
    5882                 :             : 
    5883                 :     1527548 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    5884                 :             : 
    5885                 :     1527548 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    5886                 :     1527548 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    5887                 :             : 
    5888                 :     1527548 :   if (!rhs1_type || !rhs2_type
    5889                 :     1527548 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    5890                 :             :     return NULL;
    5891                 :             : 
    5892                 :       19087 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    5893                 :             :     {
    5894                 :       11970 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    5895                 :       11970 :       if (!vectype1)
    5896                 :             :         return NULL;
    5897                 :       11970 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    5898                 :             :     }
    5899                 :             :   else
    5900                 :             :     {
    5901                 :        7117 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    5902                 :        7117 :       if (!vectype1)
    5903                 :             :         return NULL;
    5904                 :        7117 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    5905                 :             :     }
    5906                 :             : 
    5907                 :       19087 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5908                 :       19087 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    5909                 :             : 
    5910                 :       19087 :   *type_out = vectype1;
    5911                 :       19087 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5912                 :             : 
    5913                 :       19087 :   return pattern_stmt;
    5914                 :             : }
    5915                 :             : 
    5916                 :             : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    5917                 :             :    the boolean condition under which it occurs, otherwise return null.  */
    5918                 :             : 
    5919                 :             : static tree
    5920                 :       32844 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    5921                 :             : {
    5922                 :       32844 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    5923                 :             :     {
    5924                 :       31402 :       gcc_assert (gimple_assign_single_p (def_assign));
    5925                 :             :       return NULL_TREE;
    5926                 :             :     }
    5927                 :             : 
    5928                 :        1442 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    5929                 :             :     {
    5930                 :        1442 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    5931                 :        1442 :       int mask_index = internal_fn_mask_index (ifn);
    5932                 :        1442 :       return gimple_call_arg (def_call, mask_index);
    5933                 :             :     }
    5934                 :             : 
    5935                 :           0 :   gcc_unreachable ();
    5936                 :             : }
    5937                 :             : 
    5938                 :             : /* Return MASK if MASK is suitable for masking an operation on vectors
    5939                 :             :    of type VECTYPE, otherwise convert it into such a form and return
    5940                 :             :    the result.  Associate any conversion statements with STMT_INFO's
    5941                 :             :    pattern.  */
    5942                 :             : 
    5943                 :             : static tree
    5944                 :           0 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    5945                 :             :                                stmt_vec_info stmt_info, vec_info *vinfo)
    5946                 :             : {
    5947                 :           0 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    5948                 :           0 :   if (mask_type)
    5949                 :             :     {
    5950                 :           0 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    5951                 :           0 :       if (mask_vectype
    5952                 :           0 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    5953                 :           0 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    5954                 :           0 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    5955                 :             :     }
    5956                 :           0 :   return mask;
    5957                 :             : }
    5958                 :             : 
    5959                 :             : /* Return the equivalent of:
    5960                 :             : 
    5961                 :             :      fold_convert (TYPE, VALUE)
    5962                 :             : 
    5963                 :             :    with the expectation that the operation will be vectorized.
    5964                 :             :    If new statements are needed, add them as pattern statements
    5965                 :             :    to STMT_INFO.  */
    5966                 :             : 
    5967                 :             : static tree
    5968                 :           0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    5969                 :             :                                 tree type, tree value, stmt_vec_info stmt_info)
    5970                 :             : {
    5971                 :           0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    5972                 :             :     return value;
    5973                 :             : 
    5974                 :           0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    5975                 :           0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    5976                 :           0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    5977                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    5978                 :           0 :   return new_value;
    5979                 :             : }
    5980                 :             : 
    5981                 :             : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    5982                 :             :    internal function.  Return the final statement on success and set
    5983                 :             :    *TYPE_OUT to the vector type being loaded or stored.
    5984                 :             : 
    5985                 :             :    This function only handles gathers and scatters that were recognized
    5986                 :             :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    5987                 :             : 
    5988                 :             : static gimple *
    5989                 :    30018492 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    5990                 :             :                                    stmt_vec_info stmt_info, tree *type_out)
    5991                 :             : {
    5992                 :             :   /* Currently we only support this for loop vectorization.  */
    5993                 :    33354319 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5994                 :     3335827 :   if (!loop_vinfo)
    5995                 :             :     return NULL;
    5996                 :             : 
    5997                 :             :   /* Make sure that we're looking at a gather load or scatter store.  */
    5998                 :     3335827 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    5999                 :     3335827 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    6000                 :             :     return NULL;
    6001                 :             : 
    6002                 :             :   /* Get the boolean that controls whether the load or store happens.
    6003                 :             :      This is null if the operation is unconditional.  */
    6004                 :       32844 :   tree mask = vect_get_load_store_mask (stmt_info);
    6005                 :             : 
    6006                 :             :   /* Make sure that the target supports an appropriate internal
    6007                 :             :      function for the gather/scatter operation.  */
    6008                 :       32844 :   gather_scatter_info gs_info;
    6009                 :       32844 :   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
    6010                 :       32844 :       || gs_info.ifn == IFN_LAST)
    6011                 :             :     return NULL;
    6012                 :             : 
    6013                 :             :   /* Convert the mask to the right form.  */
    6014                 :           0 :   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
    6015                 :             :                                                  gs_info.element_type);
    6016                 :           0 :   if (mask)
    6017                 :           0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    6018                 :             :                                           loop_vinfo);
    6019                 :           0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6020                 :           0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6021                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6022                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6023                 :           0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6024                 :             : 
    6025                 :             :   /* Get the invariant base and non-invariant offset, converting the
    6026                 :             :      latter to the same width as the vector elements.  */
    6027                 :           0 :   tree base = gs_info.base;
    6028                 :           0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6029                 :           0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6030                 :             :                                                 gs_info.offset, stmt_info);
    6031                 :             : 
    6032                 :             :   /* Build the new pattern statement.  */
    6033                 :           0 :   tree scale = size_int (gs_info.scale);
    6034                 :           0 :   gcall *pattern_stmt;
    6035                 :             : 
    6036                 :           0 :   if (DR_IS_READ (dr))
    6037                 :             :     {
    6038                 :           0 :       tree zero = build_zero_cst (gs_info.element_type);
    6039                 :           0 :       if (mask != NULL)
    6040                 :             :         {
    6041                 :           0 :           int elsval = MASK_LOAD_ELSE_ZERO;
    6042                 :             : 
    6043                 :           0 :           tree vec_els
    6044                 :           0 :             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
    6045                 :           0 :           pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
    6046                 :             :                                                      offset, scale, zero, mask,
    6047                 :             :                                                      vec_els);
    6048                 :             :         }
    6049                 :             :       else
    6050                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
    6051                 :             :                                                    offset, scale, zero);
    6052                 :           0 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
    6053                 :           0 :       tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6054                 :           0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6055                 :             :     }
    6056                 :             :   else
    6057                 :             :     {
    6058                 :           0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6059                 :           0 :       if (mask != NULL)
    6060                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6061                 :             :                                                    base, offset, scale, rhs,
    6062                 :             :                                                    mask);
    6063                 :             :       else
    6064                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
    6065                 :             :                                                    base, offset, scale, rhs);
    6066                 :             :     }
    6067                 :           0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6068                 :             : 
    6069                 :             :   /* Copy across relevant vectorization info and associate DR with the
    6070                 :             :      new pattern statement instead of the original statement.  */
    6071                 :           0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6072                 :           0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6073                 :             : 
    6074                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6075                 :           0 :   *type_out = vectype;
    6076                 :           0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6077                 :             : 
    6078                 :           0 :   return pattern_stmt;
    6079                 :             : }
    6080                 :             : 
    6081                 :             : /* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
    6082                 :             :    is points to a load statement that reads the same data as that of
    6083                 :             :    STORE_VINFO.  */
    6084                 :             : 
    6085                 :             : static bool
    6086                 :       27861 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
    6087                 :             :                                   stmt_vec_info store_vinfo, tree cond_arg)
    6088                 :             : {
    6089                 :       27861 :   stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
    6090                 :       27861 :   if (!load_stmt_vinfo
    6091                 :       15631 :       || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
    6092                 :       10479 :       || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
    6093                 :       38340 :       || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
    6094                 :             :                           STMT_VINFO_DATA_REF (load_stmt_vinfo)))
    6095                 :       20071 :     return false;
    6096                 :             : 
    6097                 :             :   return true;
    6098                 :             : }
    6099                 :             : 
    6100                 :             : /* Function vect_recog_cond_store_pattern
    6101                 :             : 
    6102                 :             :    Try to find the following pattern:
    6103                 :             : 
    6104                 :             :    x = *_3;
    6105                 :             :    c = a CMP b;
    6106                 :             :    y = c ? t_20 : x;
    6107                 :             :    *_3 = y;
    6108                 :             : 
    6109                 :             :    where the store of _3 happens on a conditional select on a value loaded
    6110                 :             :    from the same location.  In such case we can elide the initial load if
    6111                 :             :    MASK_STORE is supported and instead only conditionally write out the result.
    6112                 :             : 
    6113                 :             :    The pattern produces for the above:
    6114                 :             : 
    6115                 :             :    c = a CMP b;
    6116                 :             :    .MASK_STORE (_3, c, t_20)
    6117                 :             : 
    6118                 :             :    Input:
    6119                 :             : 
    6120                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    6121                 :             :    example, when this function is called with _3 then the search begins.
    6122                 :             : 
    6123                 :             :    Output:
    6124                 :             : 
    6125                 :             :    * TYPE_OUT: The type of the output  of this pattern.
    6126                 :             : 
    6127                 :             :    * Return value: A new stmt that will be used to replace the sequence.  */
    6128                 :             : 
    6129                 :             : static gimple *
    6130                 :    30018492 : vect_recog_cond_store_pattern (vec_info *vinfo,
    6131                 :             :                                stmt_vec_info stmt_vinfo, tree *type_out)
    6132                 :             : {
    6133                 :    30018492 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6134                 :     3335827 :   if (!loop_vinfo)
    6135                 :             :     return NULL;
    6136                 :             : 
    6137                 :     3335827 :   gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
    6138                 :             : 
    6139                 :             :   /* Needs to be a gimple store where we have DR info for.  */
    6140                 :     3335827 :   if (!STMT_VINFO_DATA_REF (stmt_vinfo)
    6141                 :      780162 :       || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
    6142                 :     3614379 :       || !gimple_store_p (store_stmt))
    6143                 :     3058852 :     return NULL;
    6144                 :             : 
    6145                 :      276975 :   tree st_rhs = gimple_assign_rhs1 (store_stmt);
    6146                 :             : 
    6147                 :      276975 :   if (TREE_CODE (st_rhs) != SSA_NAME)
    6148                 :             :     return NULL;
    6149                 :             : 
    6150                 :      227754 :   auto cond_vinfo = vinfo->lookup_def (st_rhs);
    6151                 :             : 
    6152                 :             :   /* If the condition isn't part of the loop then bool recog wouldn't have seen
    6153                 :             :      it and so this transformation may not be valid.  */
    6154                 :      227754 :   if (!cond_vinfo)
    6155                 :             :     return NULL;
    6156                 :             : 
    6157                 :      213555 :   cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
    6158                 :    30223177 :   gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
    6159                 :      267735 :   if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
    6160                 :             :     return NULL;
    6161                 :             : 
    6162                 :             :   /* Check if the else value matches the original loaded one.  */
    6163                 :       14571 :   bool invert = false;
    6164                 :       14571 :   tree cmp_ls = gimple_arg (cond_stmt, 0);
    6165                 :       14571 :   if (TREE_CODE (cmp_ls) != SSA_NAME)
    6166                 :             :     return NULL;
    6167                 :             : 
    6168                 :       14571 :   tree cond_arg1 = gimple_arg (cond_stmt, 1);
    6169                 :       14571 :   tree cond_arg2 = gimple_arg (cond_stmt, 2);
    6170                 :             : 
    6171                 :       14571 :   if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
    6172                 :       14571 :       && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
    6173                 :             :                                                       cond_arg1)))
    6174                 :             :     return NULL;
    6175                 :             : 
    6176                 :        7790 :   vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
    6177                 :             : 
    6178                 :        7790 :   tree scalar_type = TREE_TYPE (st_rhs);
    6179                 :        7790 :   if (VECTOR_TYPE_P (scalar_type))
    6180                 :             :     return NULL;
    6181                 :             : 
    6182                 :        7790 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6183                 :        7790 :   if (vectype == NULL_TREE)
    6184                 :             :     return NULL;
    6185                 :             : 
    6186                 :        7790 :   machine_mode mask_mode;
    6187                 :        7790 :   machine_mode vecmode = TYPE_MODE (vectype);
    6188                 :        1644 :   if (!VECTOR_MODE_P (vecmode)
    6189                 :        7790 :       || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
    6190                 :           0 :       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    6191                 :        7790 :       || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
    6192                 :        7790 :     return NULL;
    6193                 :             : 
    6194                 :           0 :   tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
    6195                 :           0 :   if (may_be_nonaddressable_p (base))
    6196                 :             :     return NULL;
    6197                 :             : 
    6198                 :             :   /* We need to use the false parameter of the conditional select.  */
    6199                 :           0 :   tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
    6200                 :           0 :   tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
    6201                 :           0 :   gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
    6202                 :             : 
    6203                 :             :   /* This is a rough estimation to check that there aren't any aliasing stores
    6204                 :             :      in between the load and store.  It's a bit strict, but for now it's good
    6205                 :             :      enough.  */
    6206                 :           0 :   if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
    6207                 :             :     return NULL;
    6208                 :             : 
    6209                 :             :   /* If we have to invert the condition, i.e. use the true argument rather than
    6210                 :             :      the false argument, we have to negate the mask.  */
    6211                 :           0 :   if (invert)
    6212                 :             :     {
    6213                 :           0 :       tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6214                 :             : 
    6215                 :             :       /* Invert the mask using ^ 1.  */
    6216                 :           0 :       tree itype = TREE_TYPE (cmp_ls);
    6217                 :           0 :       gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
    6218                 :             :                                            build_int_cst (itype, 1));
    6219                 :             : 
    6220                 :           0 :       tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
    6221                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
    6222                 :           0 :       cmp_ls= var;
    6223                 :             :     }
    6224                 :             : 
    6225                 :           0 :   if (TREE_CODE (base) != MEM_REF)
    6226                 :           0 :    base = build_fold_addr_expr (base);
    6227                 :             : 
    6228                 :           0 :   tree ptr = build_int_cst (reference_alias_ptr_type (base),
    6229                 :           0 :                             get_object_alignment (base));
    6230                 :             : 
    6231                 :             :   /* Convert the mask to the right form.  */
    6232                 :           0 :   tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
    6233                 :             :                                              vinfo);
    6234                 :             : 
    6235                 :           0 :   gcall *call
    6236                 :           0 :     = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
    6237                 :             :                                   cond_store_arg);
    6238                 :           0 :   gimple_set_location (call, gimple_location (store_stmt));
    6239                 :             : 
    6240                 :             :   /* Copy across relevant vectorization info and associate DR with the
    6241                 :             :      new pattern statement instead of the original statement.  */
    6242                 :           0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
    6243                 :           0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6244                 :             : 
    6245                 :           0 :   *type_out = vectype;
    6246                 :           0 :   return call;
    6247                 :             : }
    6248                 :             : 
    6249                 :             : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6250                 :             :    that we want to consider for narrowing.  */
    6251                 :             : 
    6252                 :             : static bool
    6253                 :    60307150 : vect_narrowable_type_p (tree type)
    6254                 :             : {
    6255                 :    60307150 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6256                 :             : }
    6257                 :             : 
    6258                 :             : /* Return true if the operation given by CODE can be truncated to N bits
    6259                 :             :    when only N bits of the output are needed.  This is only true if bit N+1
    6260                 :             :    of the inputs has no effect on the low N bits of the result.  */
    6261                 :             : 
    6262                 :             : static bool
    6263                 :    14724555 : vect_truncatable_operation_p (tree_code code)
    6264                 :             : {
    6265                 :    14724555 :   switch (code)
    6266                 :             :     {
    6267                 :             :     case NEGATE_EXPR:
    6268                 :             :     case PLUS_EXPR:
    6269                 :             :     case MINUS_EXPR:
    6270                 :             :     case MULT_EXPR:
    6271                 :             :     case BIT_NOT_EXPR:
    6272                 :             :     case BIT_AND_EXPR:
    6273                 :             :     case BIT_IOR_EXPR:
    6274                 :             :     case BIT_XOR_EXPR:
    6275                 :             :     case COND_EXPR:
    6276                 :             :       return true;
    6277                 :             : 
    6278                 :     5765854 :     default:
    6279                 :     5765854 :       return false;
    6280                 :             :     }
    6281                 :             : }
    6282                 :             : 
    6283                 :             : /* Record that STMT_INFO could be changed from operating on TYPE to
    6284                 :             :    operating on a type with the precision and sign given by PRECISION
    6285                 :             :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6286                 :             :    it might not be a whole number of bytes.  */
    6287                 :             : 
    6288                 :             : static void
    6289                 :     2129599 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6290                 :             :                          unsigned int precision, signop sign)
    6291                 :             : {
    6292                 :             :   /* Round the precision up to a whole number of bytes.  */
    6293                 :     2129599 :   precision = vect_element_precision (precision);
    6294                 :     2129599 :   if (precision < TYPE_PRECISION (type)
    6295                 :     2129599 :       && (!stmt_info->operation_precision
    6296                 :       40369 :           || stmt_info->operation_precision > precision))
    6297                 :             :     {
    6298                 :     1296789 :       stmt_info->operation_precision = precision;
    6299                 :     1296789 :       stmt_info->operation_sign = sign;
    6300                 :             :     }
    6301                 :     2129599 : }
    6302                 :             : 
    6303                 :             : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6304                 :             :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6305                 :             :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6306                 :             : 
    6307                 :             : static void
    6308                 :    10662543 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6309                 :             :                               unsigned int min_input_precision)
    6310                 :             : {
    6311                 :             :   /* This operation in isolation only requires the inputs to have
    6312                 :             :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6313                 :             :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6314                 :             :      as a whole.  E.g. consider something like:
    6315                 :             : 
    6316                 :             :          unsigned short *x, *y;
    6317                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6318                 :             : 
    6319                 :             :      The right shift can be done on unsigned chars, and only requires the
    6320                 :             :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6321                 :             :      approach would mean turning a natural chain of single-vector unsigned
    6322                 :             :      short operations into one that truncates "*x" and then extends
    6323                 :             :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6324                 :             :      operation and one vector for each unsigned char operation.
    6325                 :             :      This would be a significant pessimization.
    6326                 :             : 
    6327                 :             :      Instead only propagate the maximum of this precision and the precision
    6328                 :             :      required by the users of the result.  This means that we don't pessimize
    6329                 :             :      the case above but continue to optimize things like:
    6330                 :             : 
    6331                 :             :          unsigned char *y;
    6332                 :             :          unsigned short *x;
    6333                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6334                 :             : 
    6335                 :             :      Here we would truncate two vectors of *x to a single vector of
    6336                 :             :      unsigned chars and use single-vector unsigned char operations for
    6337                 :             :      everything else, rather than doing two unsigned short copies of
    6338                 :             :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6339                 :    10662543 :   min_input_precision = MAX (min_input_precision,
    6340                 :             :                              stmt_info->min_output_precision);
    6341                 :             : 
    6342                 :    10662543 :   if (min_input_precision < TYPE_PRECISION (type)
    6343                 :    10662543 :       && (!stmt_info->min_input_precision
    6344                 :       66914 :           || stmt_info->min_input_precision > min_input_precision))
    6345                 :      584264 :     stmt_info->min_input_precision = min_input_precision;
    6346                 :    10662543 : }
    6347                 :             : 
    6348                 :             : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6349                 :             :    we can calculate a reduced number of output bits for STMT_INFO,
    6350                 :             :    whose result is LHS.  */
    6351                 :             : 
    6352                 :             : static bool
    6353                 :    14122183 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6354                 :             :                                        stmt_vec_info stmt_info, tree lhs)
    6355                 :             : {
    6356                 :             :   /* Take the maximum precision required by users of the result.  */
    6357                 :    14122183 :   unsigned int precision = 0;
    6358                 :    14122183 :   imm_use_iterator iter;
    6359                 :    14122183 :   use_operand_p use;
    6360                 :    15051407 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6361                 :             :     {
    6362                 :    14753915 :       gimple *use_stmt = USE_STMT (use);
    6363                 :    14753915 :       if (is_gimple_debug (use_stmt))
    6364                 :      621161 :         continue;
    6365                 :    14132754 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6366                 :    14132754 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6367                 :             :         return false;
    6368                 :             :       /* The input precision recorded for COND_EXPRs applies only to the
    6369                 :             :          "then" and "else" values.  */
    6370                 :      308452 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6371                 :      264577 :       if (assign
    6372                 :      264577 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6373                 :         389 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6374                 :         389 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6375                 :             :         return false;
    6376                 :      931708 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6377                 :             :     }
    6378                 :             : 
    6379                 :      297492 :   if (dump_enabled_p ())
    6380                 :        6670 :     dump_printf_loc (MSG_NOTE, vect_location,
    6381                 :             :                      "only the low %d bits of %T are significant\n",
    6382                 :             :                      precision, lhs);
    6383                 :      297492 :   stmt_info->min_output_precision = precision;
    6384                 :      297492 :   return true;
    6385                 :             : }
    6386                 :             : 
    6387                 :             : /* Calculate min_output_precision for STMT_INFO.  */
    6388                 :             : 
    6389                 :             : static void
    6390                 :    36939893 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6391                 :             : {
    6392                 :             :   /* We're only interested in statements with a narrowable result.  */
    6393                 :    36939893 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6394                 :    36939893 :   if (!lhs
    6395                 :    28990991 :       || TREE_CODE (lhs) != SSA_NAME
    6396                 :    61157096 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6397                 :             :     return;
    6398                 :             : 
    6399                 :    14122183 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6400                 :    13824691 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6401                 :             : }
    6402                 :             : 
    6403                 :             : /* Use range information to decide whether STMT (described by STMT_INFO)
    6404                 :             :    could be done in a narrower type.  This is effectively a forward
    6405                 :             :    propagation, since it uses context-independent information that applies
    6406                 :             :    to all users of an SSA name.  */
    6407                 :             : 
    6408                 :             : static void
    6409                 :    20286152 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6410                 :             : {
    6411                 :    20286152 :   tree lhs = gimple_assign_lhs (stmt);
    6412                 :    20286152 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6413                 :    18280373 :     return;
    6414                 :             : 
    6415                 :    15803795 :   tree type = TREE_TYPE (lhs);
    6416                 :    15803795 :   if (!vect_narrowable_type_p (type))
    6417                 :             :     return;
    6418                 :             : 
    6419                 :             :   /* First see whether we have any useful range information for the result.  */
    6420                 :    10683974 :   unsigned int precision = TYPE_PRECISION (type);
    6421                 :    10683974 :   signop sign = TYPE_SIGN (type);
    6422                 :    10683974 :   wide_int min_value, max_value;
    6423                 :    10683974 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6424                 :             :     return;
    6425                 :             : 
    6426                 :     5018105 :   tree_code code = gimple_assign_rhs_code (stmt);
    6427                 :     5018105 :   unsigned int nops = gimple_num_ops (stmt);
    6428                 :             : 
    6429                 :     5018105 :   if (!vect_truncatable_operation_p (code))
    6430                 :             :     {
    6431                 :             :       /* Handle operations that can be computed in type T if all inputs
    6432                 :             :          and outputs can be represented in type T.  Also handle left and
    6433                 :             :          right shifts, where (in addition) the maximum shift amount must
    6434                 :             :          be less than the number of bits in T.  */
    6435                 :     1932835 :       bool is_shift;
    6436                 :     1932835 :       switch (code)
    6437                 :             :         {
    6438                 :             :         case LSHIFT_EXPR:
    6439                 :             :         case RSHIFT_EXPR:
    6440                 :             :           is_shift = true;
    6441                 :             :           break;
    6442                 :             : 
    6443                 :      245854 :         case ABS_EXPR:
    6444                 :      245854 :         case MIN_EXPR:
    6445                 :      245854 :         case MAX_EXPR:
    6446                 :      245854 :         case TRUNC_DIV_EXPR:
    6447                 :      245854 :         case CEIL_DIV_EXPR:
    6448                 :      245854 :         case FLOOR_DIV_EXPR:
    6449                 :      245854 :         case ROUND_DIV_EXPR:
    6450                 :      245854 :         case EXACT_DIV_EXPR:
    6451                 :             :           /* Modulus is excluded because it is typically calculated by doing
    6452                 :             :              a division, for which minimum signed / -1 isn't representable in
    6453                 :             :              the original signed type.  We could take the division range into
    6454                 :             :              account instead, if handling modulus ever becomes important.  */
    6455                 :      245854 :           is_shift = false;
    6456                 :      245854 :           break;
    6457                 :             : 
    6458                 :             :         default:
    6459                 :             :           return;
    6460                 :             :         }
    6461                 :     1261814 :       for (unsigned int i = 1; i < nops; ++i)
    6462                 :             :         {
    6463                 :      968289 :           tree op = gimple_op (stmt, i);
    6464                 :      968289 :           wide_int op_min_value, op_max_value;
    6465                 :      968289 :           if (TREE_CODE (op) == INTEGER_CST)
    6466                 :             :             {
    6467                 :      297753 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6468                 :      297753 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6469                 :             :             }
    6470                 :      670536 :           else if (TREE_CODE (op) == SSA_NAME)
    6471                 :             :             {
    6472                 :      670536 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    6473                 :             :                 return;
    6474                 :             :             }
    6475                 :             :           else
    6476                 :             :             return;
    6477                 :             : 
    6478                 :      644757 :           if (is_shift && i == 2)
    6479                 :             :             {
    6480                 :             :               /* There needs to be one more bit than the maximum shift amount.
    6481                 :             : 
    6482                 :             :                  If the maximum shift amount is already 1 less than PRECISION
    6483                 :             :                  then we can't narrow the shift further.  Dealing with that
    6484                 :             :                  case first ensures that we can safely use an unsigned range
    6485                 :             :                  below.
    6486                 :             : 
    6487                 :             :                  op_min_value isn't relevant, since shifts by negative amounts
    6488                 :             :                  are UB.  */
    6489                 :      195962 :               if (wi::geu_p (op_max_value, precision - 1))
    6490                 :             :                 return;
    6491                 :      179114 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    6492                 :             : 
    6493                 :             :               /* As explained below, we can convert a signed shift into an
    6494                 :             :                  unsigned shift if the sign bit is always clear.  At this
    6495                 :             :                  point we've already processed the ranges of the output and
    6496                 :             :                  the first input.  */
    6497                 :      179114 :               auto op_sign = sign;
    6498                 :      179114 :               if (sign == SIGNED && !wi::neg_p (min_value))
    6499                 :             :                 op_sign = UNSIGNED;
    6500                 :      358228 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    6501                 :      179114 :                                              precision, op_sign);
    6502                 :      358228 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    6503                 :      179114 :                                              precision, op_sign);
    6504                 :             :             }
    6505                 :      627909 :           min_value = wi::min (min_value, op_min_value, sign);
    6506                 :      627909 :           max_value = wi::max (max_value, op_max_value, sign);
    6507                 :      968289 :         }
    6508                 :             :     }
    6509                 :             : 
    6510                 :             :   /* Try to switch signed types for unsigned types if we can.
    6511                 :             :      This is better for two reasons.  First, unsigned ops tend
    6512                 :             :      to be cheaper than signed ops.  Second, it means that we can
    6513                 :             :      handle things like:
    6514                 :             : 
    6515                 :             :         signed char c;
    6516                 :             :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    6517                 :             : 
    6518                 :             :      as:
    6519                 :             : 
    6520                 :             :         signed char c;
    6521                 :             :         unsigned short res_1 = (unsigned short) c & 0xff00;
    6522                 :             :         int res = (int) res_1;
    6523                 :             : 
    6524                 :             :      where the intermediate result res_1 has unsigned rather than
    6525                 :             :      signed type.  */
    6526                 :     3378795 :   if (sign == SIGNED && !wi::neg_p (min_value))
    6527                 :             :     sign = UNSIGNED;
    6528                 :             : 
    6529                 :             :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    6530                 :     3378795 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    6531                 :     3378795 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    6532                 :     3378795 :   unsigned int value_precision = MAX (precision1, precision2);
    6533                 :     3378795 :   if (value_precision >= precision)
    6534                 :             :     return;
    6535                 :             : 
    6536                 :     2005779 :   if (dump_enabled_p ())
    6537                 :       78997 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6538                 :             :                      " without loss of precision: %G",
    6539                 :             :                      sign == SIGNED ? "signed" : "unsigned",
    6540                 :             :                      value_precision, (gimple *) stmt);
    6541                 :             : 
    6542                 :     2005779 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    6543                 :     2005779 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    6544                 :    10683974 : }
    6545                 :             : 
    6546                 :             : /* Use information about the users of STMT's result to decide whether
    6547                 :             :    STMT (described by STMT_INFO) could be done in a narrower type.
    6548                 :             :    This is effectively a backward propagation.  */
    6549                 :             : 
    6550                 :             : static void
    6551                 :    20286152 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    6552                 :             : {
    6553                 :    20286152 :   tree_code code = gimple_assign_rhs_code (stmt);
    6554                 :    20286152 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    6555                 :    20286152 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    6556                 :    20286152 :   if (!vect_narrowable_type_p (type))
    6557                 :    11629388 :     return;
    6558                 :             : 
    6559                 :    12523539 :   unsigned int precision = TYPE_PRECISION (type);
    6560                 :    12523539 :   unsigned int operation_precision, min_input_precision;
    6561                 :    12523539 :   switch (code)
    6562                 :             :     {
    6563                 :     2318557 :     CASE_CONVERT:
    6564                 :             :       /* Only the bits that contribute to the output matter.  Don't change
    6565                 :             :          the precision of the operation itself.  */
    6566                 :     2318557 :       operation_precision = precision;
    6567                 :     2318557 :       min_input_precision = stmt_info->min_output_precision;
    6568                 :     2318557 :       break;
    6569                 :             : 
    6570                 :      498532 :     case LSHIFT_EXPR:
    6571                 :      498532 :     case RSHIFT_EXPR:
    6572                 :      498532 :       {
    6573                 :      498532 :         tree shift = gimple_assign_rhs2 (stmt);
    6574                 :      498532 :         unsigned int min_const_shift, max_const_shift;
    6575                 :      498532 :         wide_int min_shift, max_shift;
    6576                 :      498532 :         if (TREE_CODE (shift) == SSA_NAME
    6577                 :       96908 :             && vect_get_range_info (shift, &min_shift, &max_shift)
    6578                 :       73157 :             && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
    6579                 :      568974 :             && wi::lt_p (max_shift, TYPE_PRECISION (type),
    6580                 :       70442 :                          TYPE_SIGN (TREE_TYPE (shift))))
    6581                 :             :           {
    6582                 :       63260 :             min_const_shift = min_shift.to_uhwi ();
    6583                 :       63260 :             max_const_shift = max_shift.to_uhwi ();
    6584                 :             :           }
    6585                 :      435272 :         else if (TREE_CODE (shift) == INTEGER_CST
    6586                 :      836896 :                  && wi::ltu_p (wi::to_widest (shift), precision))
    6587                 :      401516 :           min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
    6588                 :             :         else
    6589                 :       33756 :           return;
    6590                 :      464776 :         if (code == LSHIFT_EXPR)
    6591                 :             :           {
    6592                 :             :             /* Avoid creating an undefined shift.
    6593                 :             : 
    6594                 :             :                ??? We could instead use min_output_precision as-is and
    6595                 :             :                optimize out-of-range shifts to zero.  However, only
    6596                 :             :                degenerate testcases shift away all their useful input data,
    6597                 :             :                and it isn't natural to drop input operations in the middle
    6598                 :             :                of vectorization.  This sort of thing should really be
    6599                 :             :                handled before vectorization.  */
    6600                 :      143445 :             operation_precision = MAX (stmt_info->min_output_precision,
    6601                 :             :                                        max_const_shift + 1);
    6602                 :             :             /* We need CONST_SHIFT fewer bits of the input.  */
    6603                 :      143445 :             min_input_precision = (MAX (operation_precision, max_const_shift)
    6604                 :             :                                    - min_const_shift);
    6605                 :             :           }
    6606                 :             :         else
    6607                 :             :           {
    6608                 :             :             /* We need CONST_SHIFT extra bits to do the operation.  */
    6609                 :      321331 :             operation_precision = (stmt_info->min_output_precision
    6610                 :             :                                    + max_const_shift);
    6611                 :      321331 :             min_input_precision = operation_precision;
    6612                 :             :           }
    6613                 :      464776 :         break;
    6614                 :      498532 :       }
    6615                 :             : 
    6616                 :     9706450 :     default:
    6617                 :     9706450 :       if (vect_truncatable_operation_p (code))
    6618                 :             :         {
    6619                 :             :           /* Input bit N has no effect on output bits N-1 and lower.  */
    6620                 :     5873431 :           operation_precision = stmt_info->min_output_precision;
    6621                 :     5873431 :           min_input_precision = operation_precision;
    6622                 :     5873431 :           break;
    6623                 :             :         }
    6624                 :             :       return;
    6625                 :             :     }
    6626                 :             : 
    6627                 :     8656764 :   if (operation_precision < precision)
    6628                 :             :     {
    6629                 :      123820 :       if (dump_enabled_p ())
    6630                 :        3385 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6631                 :             :                          " without affecting users: %G",
    6632                 :        3385 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    6633                 :             :                          operation_precision, (gimple *) stmt);
    6634                 :      247640 :       vect_set_operation_type (stmt_info, type, operation_precision,
    6635                 :      123820 :                                TYPE_SIGN (type));
    6636                 :             :     }
    6637                 :     8656764 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    6638                 :             : }
    6639                 :             : 
    6640                 :             : /* Return true if the statement described by STMT_INFO sets a boolean
    6641                 :             :    SSA_NAME and if we know how to vectorize this kind of statement using
    6642                 :             :    vector mask types.  */
    6643                 :             : 
    6644                 :             : static bool
    6645                 :    36939893 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    6646                 :             : {
    6647                 :    36939893 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6648                 :    36939893 :   tree_code code = ERROR_MARK;
    6649                 :    36939893 :   gassign *assign = NULL;
    6650                 :    36939893 :   gcond *cond = NULL;
    6651                 :             : 
    6652                 :    36939893 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    6653                 :    20286152 :     code = gimple_assign_rhs_code (assign);
    6654                 :    16653741 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    6655                 :             :     {
    6656                 :     4954678 :       lhs = gimple_cond_lhs (cond);
    6657                 :     4954678 :       code = gimple_cond_code (cond);
    6658                 :             :     }
    6659                 :             : 
    6660                 :    36939893 :   if (!lhs
    6661                 :    33945669 :       || TREE_CODE (lhs) != SSA_NAME
    6662                 :    66111544 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6663                 :             :     return false;
    6664                 :             : 
    6665                 :     2106290 :   if (code != ERROR_MARK)
    6666                 :             :     {
    6667                 :     1797109 :       switch (code)
    6668                 :             :         {
    6669                 :             :         CASE_CONVERT:
    6670                 :             :         case SSA_NAME:
    6671                 :             :         case BIT_NOT_EXPR:
    6672                 :             :         case BIT_IOR_EXPR:
    6673                 :             :         case BIT_XOR_EXPR:
    6674                 :             :         case BIT_AND_EXPR:
    6675                 :             :           return true;
    6676                 :             : 
    6677                 :     1403758 :         default:
    6678                 :     1403758 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    6679                 :             :         }
    6680                 :             :     }
    6681                 :      309181 :   else if (is_a <gphi *> (stmt_info->stmt))
    6682                 :      199370 :     return true;
    6683                 :             :   return false;
    6684                 :             : }
    6685                 :             : 
    6686                 :             : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    6687                 :             :    a vector mask type instead of a normal vector type.  Record the
    6688                 :             :    result in STMT_INFO->mask_precision.  */
    6689                 :             : 
    6690                 :             : static void
    6691                 :    36939893 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6692                 :             : {
    6693                 :    36939893 :   if (!possible_vector_mask_operation_p (stmt_info))
    6694                 :             :     return;
    6695                 :             : 
    6696                 :             :   /* If at least one boolean input uses a vector mask type,
    6697                 :             :      pick the mask type with the narrowest elements.
    6698                 :             : 
    6699                 :             :      ??? This is the traditional behavior.  It should always produce
    6700                 :             :      the smallest number of operations, but isn't necessarily the
    6701                 :             :      optimal choice.  For example, if we have:
    6702                 :             : 
    6703                 :             :        a = b & c
    6704                 :             : 
    6705                 :             :      where:
    6706                 :             : 
    6707                 :             :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    6708                 :             :        - b also uses M16
    6709                 :             :        - c uses a mask type for 8-bit elements (M8)
    6710                 :             : 
    6711                 :             :      then picking M8 gives:
    6712                 :             : 
    6713                 :             :        - 1 M16->M8 pack for b
    6714                 :             :        - 1 M8 AND for a
    6715                 :             :        - 2 M8->M16 unpacks for the user of a
    6716                 :             : 
    6717                 :             :      whereas picking M16 would have given:
    6718                 :             : 
    6719                 :             :        - 2 M8->M16 unpacks for c
    6720                 :             :        - 2 M16 ANDs for a
    6721                 :             : 
    6722                 :             :      The number of operations are equal, but M16 would have given
    6723                 :             :      a shorter dependency chain and allowed more ILP.  */
    6724                 :     1956069 :   unsigned int precision = ~0U;
    6725                 :     1956069 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    6726                 :             : 
    6727                 :             :   /* If the statement compares two values that shouldn't use vector masks,
    6728                 :             :      try comparing the values as normal scalars instead.  */
    6729                 :     1956069 :   tree_code code = ERROR_MARK;
    6730                 :     1956069 :   tree op0_type;
    6731                 :     1956069 :   unsigned int nops = -1;
    6732                 :     1956069 :   unsigned int ops_start = 0;
    6733                 :             : 
    6734                 :     1956069 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    6735                 :             :     {
    6736                 :     1203434 :       code = gimple_assign_rhs_code (assign);
    6737                 :     1203434 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    6738                 :     1203434 :       nops = gimple_num_ops (assign);
    6739                 :     1203434 :       ops_start = 1;
    6740                 :             :     }
    6741                 :      752635 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    6742                 :             :     {
    6743                 :      553265 :       code = gimple_cond_code (cond);
    6744                 :      553265 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    6745                 :      553265 :       nops = 2;
    6746                 :      553265 :       ops_start = 0;
    6747                 :             :     }
    6748                 :             : 
    6749                 :     1756699 :   if (code != ERROR_MARK)
    6750                 :             :     {
    6751                 :     5230515 :       for (unsigned int i = ops_start; i < nops; ++i)
    6752                 :             :         {
    6753                 :     3473816 :           tree rhs = gimple_op (stmt, i);
    6754                 :     3473816 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    6755                 :     1630973 :             continue;
    6756                 :             : 
    6757                 :     1842843 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6758                 :     1842843 :           if (!def_stmt_info)
    6759                 :             :             /* Don't let external or constant operands influence the choice.
    6760                 :             :                We can convert them to whichever vector type we pick.  */
    6761                 :      566734 :             continue;
    6762                 :             : 
    6763                 :     1276109 :           if (def_stmt_info->mask_precision)
    6764                 :             :             {
    6765                 :     1064042 :               if (precision > def_stmt_info->mask_precision)
    6766                 :     3473816 :                 precision = def_stmt_info->mask_precision;
    6767                 :             :             }
    6768                 :             :         }
    6769                 :             : 
    6770                 :     1756699 :       if (precision == ~0U
    6771                 :     1413131 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    6772                 :             :         {
    6773                 :     1214106 :           scalar_mode mode;
    6774                 :     1214106 :           tree vectype, mask_type;
    6775                 :     1214106 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    6776                 :     1214106 :               && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
    6777                 :     1050229 :               && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
    6778                 :     1050229 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    6779                 :     1614564 :             precision = GET_MODE_BITSIZE (mode);
    6780                 :             :         }
    6781                 :             :     }
    6782                 :             :   else
    6783                 :             :     {
    6784                 :      199370 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    6785                 :      715167 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    6786                 :             :         {
    6787                 :      515797 :           tree rhs = gimple_phi_arg_def (phi, i);
    6788                 :             : 
    6789                 :      515797 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6790                 :      515797 :           if (!def_stmt_info)
    6791                 :             :             /* Don't let external or constant operands influence the choice.
    6792                 :             :                We can convert them to whichever vector type we pick.  */
    6793                 :      304918 :             continue;
    6794                 :             : 
    6795                 :      210879 :           if (def_stmt_info->mask_precision)
    6796                 :             :             {
    6797                 :      185770 :               if (precision > def_stmt_info->mask_precision)
    6798                 :      515797 :                 precision = def_stmt_info->mask_precision;
    6799                 :             :             }
    6800                 :             :         }
    6801                 :             :     }
    6802                 :             : 
    6803                 :     1956069 :   if (dump_enabled_p ())
    6804                 :             :     {
    6805                 :        7235 :       if (precision == ~0U)
    6806                 :        1676 :         dump_printf_loc (MSG_NOTE, vect_location,
    6807                 :             :                          "using normal nonmask vectors for %G",
    6808                 :             :                          stmt_info->stmt);
    6809                 :             :       else
    6810                 :        5559 :         dump_printf_loc (MSG_NOTE, vect_location,
    6811                 :             :                          "using boolean precision %d for %G",
    6812                 :             :                          precision, stmt_info->stmt);
    6813                 :             :     }
    6814                 :             : 
    6815                 :     1956069 :   stmt_info->mask_precision = precision;
    6816                 :             : }
    6817                 :             : 
    6818                 :             : /* Handle vect_determine_precisions for STMT_INFO, given that we
    6819                 :             :    have already done so for the users of its result.  */
    6820                 :             : 
    6821                 :             : void
    6822                 :    36939893 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    6823                 :             : {
    6824                 :    36939893 :   vect_determine_min_output_precision (vinfo, stmt_info);
    6825                 :    36939893 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    6826                 :             :     {
    6827                 :    20286152 :       vect_determine_precisions_from_range (stmt_info, stmt);
    6828                 :    20286152 :       vect_determine_precisions_from_users (stmt_info, stmt);
    6829                 :             :     }
    6830                 :    36939893 : }
    6831                 :             : 
    6832                 :             : /* Walk backwards through the vectorizable region to determine the
    6833                 :             :    values of these fields:
    6834                 :             : 
    6835                 :             :    - min_output_precision
    6836                 :             :    - min_input_precision
    6837                 :             :    - operation_precision
    6838                 :             :    - operation_sign.  */
    6839                 :             : 
    6840                 :             : void
    6841                 :      955241 : vect_determine_precisions (vec_info *vinfo)
    6842                 :             : {
    6843                 :      955241 :   basic_block *bbs = vinfo->bbs;
    6844                 :      955241 :   unsigned int nbbs = vinfo->nbbs;
    6845                 :             : 
    6846                 :      955241 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    6847                 :             : 
    6848                 :    12588507 :   for (unsigned int i = 0; i < nbbs; i++)
    6849                 :             :     {
    6850                 :    11633266 :       basic_block bb = bbs[i];
    6851                 :    19044710 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6852                 :             :         {
    6853                 :     7411444 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6854                 :     7411444 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6855                 :     7224089 :             vect_determine_mask_precision (vinfo, stmt_info);
    6856                 :             :         }
    6857                 :   112249449 :       for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6858                 :             :         {
    6859                 :    88982917 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6860                 :    88982917 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6861                 :    29715804 :             vect_determine_mask_precision (vinfo, stmt_info);
    6862                 :             :         }
    6863                 :             :     }
    6864                 :    12588507 :   for (unsigned int i = 0; i < nbbs; i++)
    6865                 :             :     {
    6866                 :    11633266 :       basic_block bb = bbs[nbbs - i - 1];
    6867                 :   201232366 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    6868                 :             :         {
    6869                 :    88982917 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6870                 :    88982917 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6871                 :    29715804 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    6872                 :             :         }
    6873                 :    19044710 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6874                 :             :         {
    6875                 :     7411444 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6876                 :     7411444 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6877                 :     7224089 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    6878                 :             :         }
    6879                 :             :     }
    6880                 :      955241 : }
    6881                 :             : 
    6882                 :             : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    6883                 :             : 
    6884                 :             : struct vect_recog_func
    6885                 :             : {
    6886                 :             :   vect_recog_func_ptr fn;
    6887                 :             :   const char *name;
    6888                 :             : };
    6889                 :             : 
    6890                 :             : /* Note that ordering matters - the first pattern matching on a stmt is
    6891                 :             :    taken which means usually the more complex one needs to preceed the
    6892                 :             :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    6893                 :             : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    6894                 :             :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    6895                 :             :   { vect_recog_bit_insert_pattern, "bit_insert" },
    6896                 :             :   { vect_recog_abd_pattern, "abd" },
    6897                 :             :   { vect_recog_over_widening_pattern, "over_widening" },
    6898                 :             :   /* Must come after over_widening, which narrows the shift as much as
    6899                 :             :      possible beforehand.  */
    6900                 :             :   { vect_recog_average_pattern, "average" },
    6901                 :             :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    6902                 :             :   { vect_recog_mulhs_pattern, "mult_high" },
    6903                 :             :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    6904                 :             :   { vect_recog_widen_mult_pattern, "widen_mult" },
    6905                 :             :   { vect_recog_dot_prod_pattern, "dot_prod" },
    6906                 :             :   { vect_recog_sad_pattern, "sad" },
    6907                 :             :   { vect_recog_widen_sum_pattern, "widen_sum" },
    6908                 :             :   { vect_recog_pow_pattern, "pow" },
    6909                 :             :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    6910                 :             :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    6911                 :             :   { vect_recog_widen_shift_pattern, "widen_shift" },
    6912                 :             :   { vect_recog_rotate_pattern, "rotate" },
    6913                 :             :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    6914                 :             :   { vect_recog_divmod_pattern, "divmod" },
    6915                 :             :   { vect_recog_mod_var_pattern, "modvar" },
    6916                 :             :   { vect_recog_mult_pattern, "mult" },
    6917                 :             :   { vect_recog_sat_add_pattern, "sat_add" },
    6918                 :             :   { vect_recog_sat_sub_pattern, "sat_sub" },
    6919                 :             :   { vect_recog_sat_trunc_pattern, "sat_trunc" },
    6920                 :             :   { vect_recog_gcond_pattern, "gcond" },
    6921                 :             :   { vect_recog_bool_pattern, "bool" },
    6922                 :             :   /* This must come before mask conversion, and includes the parts
    6923                 :             :      of mask conversion that are needed for gather and scatter
    6924                 :             :      internal functions.  */
    6925                 :             :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    6926                 :             :   { vect_recog_cond_store_pattern, "cond_store" },
    6927                 :             :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    6928                 :             :   { vect_recog_widen_plus_pattern, "widen_plus" },
    6929                 :             :   { vect_recog_widen_minus_pattern, "widen_minus" },
    6930                 :             :   { vect_recog_widen_abd_pattern, "widen_abd" },
    6931                 :             :   /* These must come after the double widening ones.  */
    6932                 :             : };
    6933                 :             : 
    6934                 :             : /* Mark statements that are involved in a pattern.  */
    6935                 :             : 
    6936                 :             : void
    6937                 :      826665 : vect_mark_pattern_stmts (vec_info *vinfo,
    6938                 :             :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    6939                 :             :                          tree pattern_vectype)
    6940                 :             : {
    6941                 :      826665 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    6942                 :      826665 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    6943                 :             : 
    6944                 :      826665 :   gimple *orig_pattern_stmt = NULL;
    6945                 :      826665 :   if (is_pattern_stmt_p (orig_stmt_info))
    6946                 :             :     {
    6947                 :             :       /* We're replacing a statement in an existing pattern definition
    6948                 :             :          sequence.  */
    6949                 :       12616 :       orig_pattern_stmt = orig_stmt_info->stmt;
    6950                 :       12616 :       if (dump_enabled_p ())
    6951                 :         679 :         dump_printf_loc (MSG_NOTE, vect_location,
    6952                 :             :                          "replacing earlier pattern %G", orig_pattern_stmt);
    6953                 :             : 
    6954                 :             :       /* To keep the book-keeping simple, just swap the lhs of the
    6955                 :             :          old and new statements, so that the old one has a valid but
    6956                 :             :          unused lhs.  */
    6957                 :       12616 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    6958                 :       12616 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    6959                 :       12616 :       gimple_set_lhs (pattern_stmt, old_lhs);
    6960                 :             : 
    6961                 :       12616 :       if (dump_enabled_p ())
    6962                 :         679 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    6963                 :             : 
    6964                 :             :       /* Switch to the statement that ORIG replaces.  */
    6965                 :       12616 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    6966                 :             : 
    6967                 :             :       /* We shouldn't be replacing the main pattern statement.  */
    6968                 :       12616 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    6969                 :             :                   != orig_pattern_stmt);
    6970                 :             :     }
    6971                 :             : 
    6972                 :      826665 :   if (def_seq)
    6973                 :             :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    6974                 :     1825602 :          !gsi_end_p (si); gsi_next (&si))
    6975                 :             :       {
    6976                 :     1116821 :         if (dump_enabled_p ())
    6977                 :       23592 :           dump_printf_loc (MSG_NOTE, vect_location,
    6978                 :             :                            "extra pattern stmt: %G", gsi_stmt (si));
    6979                 :     1116821 :         stmt_vec_info pattern_stmt_info
    6980                 :     1116821 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    6981                 :             :                                     orig_stmt_info, pattern_vectype);
    6982                 :             :         /* Stmts in the def sequence are not vectorizable cycle or
    6983                 :             :            induction defs, instead they should all be vect_internal_def
    6984                 :             :            feeding the main pattern stmt which retains this def type.  */
    6985                 :     1116821 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    6986                 :             :       }
    6987                 :             : 
    6988                 :      826665 :   if (orig_pattern_stmt)
    6989                 :             :     {
    6990                 :       12616 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    6991                 :             :                               orig_stmt_info, pattern_vectype);
    6992                 :             : 
    6993                 :             :       /* Insert all the new pattern statements before the original one.  */
    6994                 :       12616 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    6995                 :       12616 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    6996                 :             :                                                orig_def_seq);
    6997                 :       12616 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    6998                 :       12616 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    6999                 :             : 
    7000                 :             :       /* Remove the pattern statement that this new pattern replaces.  */
    7001                 :       12616 :       gsi_remove (&gsi, false);
    7002                 :             :     }
    7003                 :             :   else
    7004                 :      814049 :     vect_set_pattern_stmt (vinfo,
    7005                 :             :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    7006                 :             : 
    7007                 :             :   /* For any conditionals mark them as vect_condition_def.  */
    7008                 :      826665 :   if (is_a <gcond *> (pattern_stmt))
    7009                 :      264291 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    7010                 :             : 
    7011                 :             :   /* Transfer reduction path info to the pattern.  */
    7012                 :      826665 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    7013                 :             :     {
    7014                 :        9072 :       gimple_match_op op;
    7015                 :        9072 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    7016                 :           0 :         gcc_unreachable ();
    7017                 :        9072 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    7018                 :             :       /* Search the pattern def sequence and the main pattern stmt.  Note
    7019                 :             :          we may have inserted all into a containing pattern def sequence
    7020                 :             :          so the following is a bit awkward.  */
    7021                 :        9072 :       gimple_stmt_iterator si;
    7022                 :        9072 :       gimple *s;
    7023                 :        9072 :       if (def_seq)
    7024                 :             :         {
    7025                 :        8418 :           si = gsi_start (def_seq);
    7026                 :        8418 :           s = gsi_stmt (si);
    7027                 :        8418 :           gsi_next (&si);
    7028                 :             :         }
    7029                 :             :       else
    7030                 :             :         {
    7031                 :             :           si = gsi_none ();
    7032                 :             :           s = pattern_stmt;
    7033                 :             :         }
    7034                 :       17605 :       do
    7035                 :             :         {
    7036                 :       17605 :           bool found = false;
    7037                 :       17605 :           if (gimple_extract_op (s, &op))
    7038                 :       42240 :             for (unsigned i = 0; i < op.num_ops; ++i)
    7039                 :       33868 :               if (op.ops[i] == lookfor)
    7040                 :             :                 {
    7041                 :        9233 :                   STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7042                 :        9233 :                   lookfor = gimple_get_lhs (s);
    7043                 :        9233 :                   found = true;
    7044                 :        9233 :                   break;
    7045                 :             :                 }
    7046                 :       17605 :           if (s == pattern_stmt)
    7047                 :             :             {
    7048                 :        9072 :               if (!found && dump_enabled_p ())
    7049                 :           0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7050                 :             :                                  "failed to update reduction index.\n");
    7051                 :        9072 :               break;
    7052                 :             :             }
    7053                 :        8533 :           if (gsi_end_p (si))
    7054                 :             :             s = pattern_stmt;
    7055                 :             :           else
    7056                 :             :             {
    7057                 :         115 :               s = gsi_stmt (si);
    7058                 :         115 :               if (s == pattern_stmt)
    7059                 :             :                 /* Found the end inside a bigger pattern def seq.  */
    7060                 :             :                 si = gsi_none ();
    7061                 :             :               else
    7062                 :         115 :                 gsi_next (&si);
    7063                 :             :             }
    7064                 :             :         } while (1);
    7065                 :             :     }
    7066                 :      826665 : }
    7067                 :             : 
    7068                 :             : /* Function vect_pattern_recog_1
    7069                 :             : 
    7070                 :             :    Input:
    7071                 :             :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7072                 :             :         computation pattern.
    7073                 :             :    STMT_INFO: A stmt from which the pattern search should start.
    7074                 :             : 
    7075                 :             :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7076                 :             :    a sequence of statements that has the same functionality and can be
    7077                 :             :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7078                 :             :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7079                 :             :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7080                 :             :    statement, having first checked that the target supports the new operation
    7081                 :             :    in that type.
    7082                 :             : 
    7083                 :             :    This function also does some bookkeeping, as explained in the documentation
    7084                 :             :    for vect_recog_pattern.  */
    7085                 :             : 
    7086                 :             : static void
    7087                 :   967639477 : vect_pattern_recog_1 (vec_info *vinfo,
    7088                 :             :                       const vect_recog_func &recog_func, stmt_vec_info stmt_info)
    7089                 :             : {
    7090                 :   967639477 :   gimple *pattern_stmt;
    7091                 :   967639477 :   tree pattern_vectype;
    7092                 :             : 
    7093                 :             :   /* If this statement has already been replaced with pattern statements,
    7094                 :             :      leave the original statement alone, since the first match wins.
    7095                 :             :      Instead try to match against the definition statements that feed
    7096                 :             :      the main pattern statement.  */
    7097                 :   967639477 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7098                 :             :     {
    7099                 :    11535676 :       gimple_stmt_iterator gsi;
    7100                 :    11535676 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7101                 :    28269425 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7102                 :    16733749 :         vect_pattern_recog_1 (vinfo, recog_func,
    7103                 :             :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7104                 :             :       return;
    7105                 :             :     }
    7106                 :             : 
    7107                 :   956103801 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7108                 :   956103801 :   pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
    7109                 :   956103801 :   if (!pattern_stmt)
    7110                 :             :     {
    7111                 :             :       /* Clear any half-formed pattern definition sequence.  */
    7112                 :   955277136 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7113                 :   955277136 :       return;
    7114                 :             :     }
    7115                 :             : 
    7116                 :             :   /* Found a vectorizable pattern.  */
    7117                 :      826665 :   if (dump_enabled_p ())
    7118                 :       18798 :     dump_printf_loc (MSG_NOTE, vect_location,
    7119                 :             :                      "%s pattern recognized: %G",
    7120                 :       18798 :                      recog_func.name, pattern_stmt);
    7121                 :             : 
    7122                 :             :   /* Mark the stmts that are involved in the pattern. */
    7123                 :      826665 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7124                 :             : }
    7125                 :             : 
    7126                 :             : 
    7127                 :             : /* Function vect_pattern_recog
    7128                 :             : 
    7129                 :             :    Input:
    7130                 :             :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7131                 :             :         computation idioms.
    7132                 :             : 
    7133                 :             :    Output - for each computation idiom that is detected we create a new stmt
    7134                 :             :         that provides the same functionality and that can be vectorized.  We
    7135                 :             :         also record some information in the struct_stmt_info of the relevant
    7136                 :             :         stmts, as explained below:
    7137                 :             : 
    7138                 :             :    At the entry to this function we have the following stmts, with the
    7139                 :             :    following initial value in the STMT_VINFO fields:
    7140                 :             : 
    7141                 :             :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7142                 :             :          S1: a_i = ....                 -       -               -
    7143                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7144                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7145                 :             :          S4: a_0 = ..use(a_1)..         -       -               -
    7146                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7147                 :             : 
    7148                 :             :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7149                 :             :    represented by a single stmt.  We then:
    7150                 :             :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7151                 :             :      inserted into the code)
    7152                 :             :    - fill in the STMT_VINFO fields as follows:
    7153                 :             : 
    7154                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7155                 :             :          S1: a_i = ....                 -       -               -
    7156                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7157                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7158                 :             :          S4: a_0 = ..use(a_1)..         true    S6              -
    7159                 :             :           '---> S6: a_new = ....        -       S4              -
    7160                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7161                 :             : 
    7162                 :             :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7163                 :             :    to each other through the RELATED_STMT field).
    7164                 :             : 
    7165                 :             :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7166                 :             :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7167                 :             :    remain irrelevant unless used by stmts other than S4.
    7168                 :             : 
    7169                 :             :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7170                 :             :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7171                 :             :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7172                 :             :    S4 will be skipped, and S5 will be vectorized as usual:
    7173                 :             : 
    7174                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7175                 :             :          S1: a_i = ....                 -       -               -
    7176                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7177                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7178                 :             :        > VS6: va_new = ....             -       -               -
    7179                 :             :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7180                 :             :           '---> S6: a_new = ....        -       S4              VS6
    7181                 :             :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7182                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7183                 :             : 
    7184                 :             :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7185                 :             :    elsewhere), and we'll end up with:
    7186                 :             : 
    7187                 :             :         VS6: va_new = ....
    7188                 :             :         VS5: ... = ..vuse(va_new)..
    7189                 :             : 
    7190                 :             :    In case of more than one pattern statements, e.g., widen-mult with
    7191                 :             :    intermediate type:
    7192                 :             : 
    7193                 :             :      S1  a_t = ;
    7194                 :             :      S2  a_T = (TYPE) a_t;
    7195                 :             :            '--> S3: a_it = (interm_type) a_t;
    7196                 :             :      S4  prod_T = a_T * CONST;
    7197                 :             :            '--> S5: prod_T' = a_it w* CONST;
    7198                 :             : 
    7199                 :             :    there may be other users of a_T outside the pattern.  In that case S2 will
    7200                 :             :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7201                 :             :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7202                 :             :    be recorded in S3.  */
    7203                 :             : 
    7204                 :             : void
    7205                 :      955241 : vect_pattern_recog (vec_info *vinfo)
    7206                 :             : {
    7207                 :      955241 :   basic_block *bbs = vinfo->bbs;
    7208                 :      955241 :   unsigned int nbbs = vinfo->nbbs;
    7209                 :             : 
    7210                 :      955241 :   vect_determine_precisions (vinfo);
    7211                 :             : 
    7212                 :      955241 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7213                 :             : 
    7214                 :             :   /* Scan through the stmts in the region, applying the pattern recognition
    7215                 :             :      functions starting at each stmt visited.  */
    7216                 :    12588507 :   for (unsigned i = 0; i < nbbs; i++)
    7217                 :             :     {
    7218                 :    11633266 :       basic_block bb = bbs[i];
    7219                 :             : 
    7220                 :   112249449 :       for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7221                 :             :         {
    7222                 :    88982917 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7223                 :             : 
    7224                 :    88982917 :           if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7225                 :    59267113 :             continue;
    7226                 :             : 
    7227                 :             :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7228                 :   980621532 :           for (const auto &func_ptr : vect_vect_recog_func_ptrs)
    7229                 :   950905728 :             vect_pattern_recog_1 (vinfo, func_ptr,
    7230                 :             :                                   stmt_info);
    7231                 :             :         }
    7232                 :             :     }
    7233                 :             : 
    7234                 :             :   /* After this no more add_stmt calls are allowed.  */
    7235                 :      955241 :   vinfo->stmt_vec_info_ro = true;
    7236                 :      955241 : }
    7237                 :             : 
    7238                 :             : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7239                 :             :    or internal_fn contained in ch, respectively.  */
    7240                 :             : gimple *
    7241                 :      135219 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7242                 :             : {
    7243                 :      135219 :   gcc_assert (op0 != NULL_TREE);
    7244                 :      135219 :   if (ch.is_tree_code ())
    7245                 :      135219 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7246                 :             : 
    7247                 :           0 :   gcc_assert (ch.is_internal_fn ());
    7248                 :           0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7249                 :             :                                              op1 == NULL_TREE ? 1 : 2,
    7250                 :             :                                              op0, op1);
    7251                 :           0 :   gimple_call_set_lhs (stmt, lhs);
    7252                 :           0 :   return stmt;
    7253                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.