LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.2 % 2818 2515
Test Date: 2024-12-21 13:15:12 Functions: 92.9 % 84 78
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Analysis Utilities for Loop Vectorization.
       2                 :             :    Copyright (C) 2006-2024 Free Software Foundation, Inc.
       3                 :             :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.  */
      20                 :             : 
      21                 :             : #include "config.h"
      22                 :             : #include "system.h"
      23                 :             : #include "coretypes.h"
      24                 :             : #include "backend.h"
      25                 :             : #include "rtl.h"
      26                 :             : #include "tree.h"
      27                 :             : #include "gimple.h"
      28                 :             : #include "gimple-iterator.h"
      29                 :             : #include "gimple-fold.h"
      30                 :             : #include "ssa.h"
      31                 :             : #include "expmed.h"
      32                 :             : #include "optabs-tree.h"
      33                 :             : #include "insn-config.h"
      34                 :             : #include "recog.h"            /* FIXME: for insn_data */
      35                 :             : #include "fold-const.h"
      36                 :             : #include "stor-layout.h"
      37                 :             : #include "tree-eh.h"
      38                 :             : #include "gimplify.h"
      39                 :             : #include "gimple-iterator.h"
      40                 :             : #include "gimple-fold.h"
      41                 :             : #include "gimplify-me.h"
      42                 :             : #include "cfgloop.h"
      43                 :             : #include "tree-vectorizer.h"
      44                 :             : #include "dumpfile.h"
      45                 :             : #include "builtins.h"
      46                 :             : #include "internal-fn.h"
      47                 :             : #include "case-cfn-macros.h"
      48                 :             : #include "fold-const-call.h"
      49                 :             : #include "attribs.h"
      50                 :             : #include "cgraph.h"
      51                 :             : #include "omp-simd-clone.h"
      52                 :             : #include "predict.h"
      53                 :             : #include "tree-vector-builder.h"
      54                 :             : #include "tree-ssa-loop-ivopts.h"
      55                 :             : #include "vec-perm-indices.h"
      56                 :             : #include "gimple-range.h"
      57                 :             : #include "alias.h"
      58                 :             : 
      59                 :             : 
      60                 :             : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      61                 :             :    in the first operand.  Disentangling this is future work, the
      62                 :             :    IL is properly transfered to VEC_COND_EXPRs with separate compares.  */
      63                 :             : 
      64                 :             : 
      65                 :             : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      66                 :             :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      67                 :             : 
      68                 :             : bool
      69                 :    10471628 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      70                 :             : {
      71                 :    10471628 :   int_range_max vr;
      72                 :    10471628 :   tree vr_min, vr_max;
      73                 :    20943256 :   get_range_query (cfun)->range_of_expr (vr, var);
      74                 :    10471628 :   if (vr.undefined_p ())
      75                 :          27 :     vr.set_varying (TREE_TYPE (var));
      76                 :    10471628 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      77                 :    10471628 :   *min_value = wi::to_wide (vr_min);
      78                 :    10471628 :   *max_value = wi::to_wide (vr_max);
      79                 :    10471628 :   wide_int nonzero = get_nonzero_bits (var);
      80                 :    10471628 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      81                 :    10471628 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      82                 :             :                                          nonzero, sgn) == VR_RANGE)
      83                 :             :     {
      84                 :     4784649 :       if (dump_enabled_p ())
      85                 :             :         {
      86                 :       69237 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      87                 :       69237 :           dump_printf (MSG_NOTE, " has range [");
      88                 :       69237 :           dump_hex (MSG_NOTE, *min_value);
      89                 :       69237 :           dump_printf (MSG_NOTE, ", ");
      90                 :       69237 :           dump_hex (MSG_NOTE, *max_value);
      91                 :       69237 :           dump_printf (MSG_NOTE, "]\n");
      92                 :             :         }
      93                 :     4784649 :       return true;
      94                 :             :     }
      95                 :             :   else
      96                 :             :     {
      97                 :     5686979 :       if (dump_enabled_p ())
      98                 :             :         {
      99                 :       84237 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
     100                 :       84237 :           dump_printf (MSG_NOTE, " has no range info\n");
     101                 :             :         }
     102                 :     5686979 :       return false;
     103                 :             :     }
     104                 :    10471628 : }
     105                 :             : 
     106                 :             : /* Report that we've found an instance of pattern PATTERN in
     107                 :             :    statement STMT.  */
     108                 :             : 
     109                 :             : static void
     110                 :      932780 : vect_pattern_detected (const char *name, gimple *stmt)
     111                 :             : {
     112                 :      932780 :   if (dump_enabled_p ())
     113                 :       25449 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     114                 :      932780 : }
     115                 :             : 
     116                 :             : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     117                 :             :    return the pattern statement's stmt_vec_info.  Set its vector type to
     118                 :             :    VECTYPE if it doesn't have one already.  */
     119                 :             : 
     120                 :             : static stmt_vec_info
     121                 :     1713251 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     122                 :             :                         stmt_vec_info orig_stmt_info, tree vectype)
     123                 :             : {
     124                 :     1713251 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     125                 :     1713251 :   if (pattern_stmt_info == NULL)
     126                 :     1022827 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     127                 :     1713251 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     128                 :             : 
     129                 :     1713251 :   pattern_stmt_info->pattern_stmt_p = true;
     130                 :     1713251 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     131                 :     1713251 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     132                 :     1713251 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     133                 :     1713251 :   STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
     134                 :     1713251 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     135                 :             :     {
     136                 :     1802817 :       gcc_assert (!vectype
     137                 :             :                   || is_a <gcond *> (pattern_stmt)
     138                 :             :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     139                 :             :                       == vect_use_mask_type_p (orig_stmt_info)));
     140                 :     1031255 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     141                 :     1031255 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     142                 :             :     }
     143                 :     1713251 :   return pattern_stmt_info;
     144                 :             : }
     145                 :             : 
     146                 :             : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     147                 :             :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     148                 :             :    have one already.  */
     149                 :             : 
     150                 :             : static void
     151                 :      733096 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     152                 :             :                        stmt_vec_info orig_stmt_info, tree vectype)
     153                 :             : {
     154                 :      733096 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     155                 :      733096 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     156                 :           0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     157                 :      703476 : }
     158                 :             : 
     159                 :             : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     160                 :             :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     161                 :             :    be different from the vector type of the final pattern statement.
     162                 :             :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     163                 :             :    from which it was derived.  */
     164                 :             : 
     165                 :             : static inline void
     166                 :      938523 : append_pattern_def_seq (vec_info *vinfo,
     167                 :             :                         stmt_vec_info stmt_info, gimple *new_stmt,
     168                 :             :                         tree vectype = NULL_TREE,
     169                 :             :                         tree scalar_type_for_mask = NULL_TREE)
     170                 :             : {
     171                 :     1513318 :   gcc_assert (!scalar_type_for_mask
     172                 :             :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     173                 :      938523 :   if (vectype)
     174                 :             :     {
     175                 :      681996 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     176                 :      681996 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     177                 :      681996 :       if (scalar_type_for_mask)
     178                 :      363728 :         new_stmt_info->mask_precision
     179                 :      727456 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     180                 :             :     }
     181                 :      938523 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     182                 :             :                                       new_stmt);
     183                 :      938523 : }
     184                 :             : 
     185                 :             : 
     186                 :             : /* Add NEW_STMT to VINFO's invariant pattern definition statements.  These
     187                 :             :    statements are not vectorized but are materialized as scalar in the loop
     188                 :             :    preheader.  */
     189                 :             : 
     190                 :             : static inline void
     191                 :        1338 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
     192                 :             : {
     193                 :        1338 :   gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
     194                 :             : }
     195                 :             : 
     196                 :             : /* The caller wants to perform new operations on vect_external variable
     197                 :             :    VAR, so that the result of the operations would also be vect_external.
     198                 :             :    Return the edge on which the operations can be performed, if one exists.
     199                 :             :    Return null if the operations should instead be treated as part of
     200                 :             :    the pattern that needs them.  */
     201                 :             : 
     202                 :             : static edge
     203                 :        4765 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     204                 :             : {
     205                 :        4765 :   edge e = NULL;
     206                 :        4765 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     207                 :             :     {
     208                 :         495 :       e = loop_preheader_edge (loop_vinfo->loop);
     209                 :         495 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     210                 :             :         {
     211                 :         410 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     212                 :         410 :           if (bb == NULL
     213                 :         410 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     214                 :             :             e = NULL;
     215                 :             :         }
     216                 :             :     }
     217                 :        4765 :   return e;
     218                 :             : }
     219                 :             : 
     220                 :             : /* Return true if the target supports a vector version of CODE,
     221                 :             :    where CODE is known to map to a direct optab with the given SUBTYPE.
     222                 :             :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     223                 :             :    specifies the type of the scalar result.
     224                 :             : 
     225                 :             :    If CODE allows the inputs and outputs to have different type
     226                 :             :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     227                 :             :    than the output mode that determines the appropriate target pattern.
     228                 :             :    Operand 0 of the target pattern then specifies the mode that the output
     229                 :             :    must have.
     230                 :             : 
     231                 :             :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     232                 :             :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     233                 :             :    is nonnull.  */
     234                 :             : 
     235                 :             : static bool
     236                 :        2059 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     237                 :             :                                  tree itype, tree *vecotype_out,
     238                 :             :                                  tree *vecitype_out = NULL,
     239                 :             :                                  enum optab_subtype subtype = optab_default)
     240                 :             : {
     241                 :        2059 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     242                 :        2059 :   if (!vecitype)
     243                 :             :     return false;
     244                 :             : 
     245                 :        2059 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     246                 :        2059 :   if (!vecotype)
     247                 :             :     return false;
     248                 :             : 
     249                 :        1949 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     250                 :        1949 :   if (!optab)
     251                 :             :     return false;
     252                 :             : 
     253                 :        1949 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     254                 :        1949 :   if (icode == CODE_FOR_nothing
     255                 :        1949 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     256                 :        1723 :     return false;
     257                 :             : 
     258                 :         226 :   *vecotype_out = vecotype;
     259                 :         226 :   if (vecitype_out)
     260                 :         226 :     *vecitype_out = vecitype;
     261                 :             :   return true;
     262                 :             : }
     263                 :             : 
     264                 :             : /* Return true if the target supports a vector version of CODE,
     265                 :             :    where CODE is known to map to a conversion optab with the given SUBTYPE.
     266                 :             :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     267                 :             :    specifies the type of the scalar result.
     268                 :             : 
     269                 :             :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     270                 :             :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     271                 :             :    is nonnull.  */
     272                 :             : 
     273                 :             : static bool
     274                 :         917 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
     275                 :             :                                  tree itype, tree *vecotype_out,
     276                 :             :                                  tree *vecitype_out = NULL,
     277                 :             :                                  enum optab_subtype subtype = optab_default)
     278                 :             : {
     279                 :         917 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     280                 :         917 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     281                 :         917 :   if (!vecitype || !vecotype)
     282                 :             :     return false;
     283                 :             : 
     284                 :         817 :   if (!directly_supported_p (code, vecotype, vecitype, subtype))
     285                 :             :     return false;
     286                 :             : 
     287                 :         425 :   *vecotype_out = vecotype;
     288                 :         425 :   if (vecitype_out)
     289                 :         425 :     *vecitype_out = vecitype;
     290                 :             :   return true;
     291                 :             : }
     292                 :             : 
     293                 :             : /* Round bit precision PRECISION up to a full element.  */
     294                 :             : 
     295                 :             : static unsigned int
     296                 :     2493428 : vect_element_precision (unsigned int precision)
     297                 :             : {
     298                 :           0 :   precision = 1 << ceil_log2 (precision);
     299                 :     3672899 :   return MAX (precision, BITS_PER_UNIT);
     300                 :             : }
     301                 :             : 
     302                 :             : /* If OP is defined by a statement that's being considered for vectorization,
     303                 :             :    return information about that statement, otherwise return NULL.  */
     304                 :             : 
     305                 :             : static stmt_vec_info
     306                 :     1429379 : vect_get_internal_def (vec_info *vinfo, tree op)
     307                 :             : {
     308                 :     1429379 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     309                 :     1429379 :   if (def_stmt_info
     310                 :     1369594 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     311                 :     1360489 :     return vect_stmt_to_vectorize (def_stmt_info);
     312                 :             :   return NULL;
     313                 :             : }
     314                 :             : 
     315                 :             : /* Holds information about an input operand after some sign changes
     316                 :             :    and type promotions have been peeled away.  */
     317                 :             : class vect_unpromoted_value {
     318                 :             : public:
     319                 :             :   vect_unpromoted_value ();
     320                 :             : 
     321                 :             :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     322                 :             : 
     323                 :             :   /* The value obtained after peeling away zero or more casts.  */
     324                 :             :   tree op;
     325                 :             : 
     326                 :             :   /* The type of OP.  */
     327                 :             :   tree type;
     328                 :             : 
     329                 :             :   /* The definition type of OP.  */
     330                 :             :   vect_def_type dt;
     331                 :             : 
     332                 :             :   /* If OP is the result of peeling at least one cast, and if the cast
     333                 :             :      of OP itself is a vectorizable statement, CASTER identifies that
     334                 :             :      statement, otherwise it is null.  */
     335                 :             :   stmt_vec_info caster;
     336                 :             : };
     337                 :             : 
     338                 :   260880313 : inline vect_unpromoted_value::vect_unpromoted_value ()
     339                 :   260880313 :   : op (NULL_TREE),
     340                 :   260880313 :     type (NULL_TREE),
     341                 :   260880313 :     dt (vect_uninitialized_def),
     342                 :     2349163 :     caster (NULL)
     343                 :             : {
     344                 :             : }
     345                 :             : 
     346                 :             : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     347                 :             :    statement that casts it to CASTER_IN.  */
     348                 :             : 
     349                 :             : inline void
     350                 :     9352659 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     351                 :             :                                stmt_vec_info caster_in)
     352                 :             : {
     353                 :     9352659 :   op = op_in;
     354                 :     9352659 :   type = TREE_TYPE (op);
     355                 :     9352659 :   dt = dt_in;
     356                 :     9352659 :   caster = caster_in;
     357                 :     9352659 : }
     358                 :             : 
     359                 :             : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     360                 :             :    to reach some vectorizable inner operand OP', continuing as long as it
     361                 :             :    is possible to convert OP' back to OP using a possible sign change
     362                 :             :    followed by a possible promotion P.  Return this OP', or null if OP is
     363                 :             :    not a vectorizable SSA name.  If there is a promotion P, describe its
     364                 :             :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     365                 :             :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     366                 :             :    have more than one user.
     367                 :             : 
     368                 :             :    A successful return means that it is possible to go from OP' to OP
     369                 :             :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     370                 :             :    whereas the cast from UNPROM to OP might be a promotion, a sign
     371                 :             :    change, or a nop.
     372                 :             : 
     373                 :             :    E.g. say we have:
     374                 :             : 
     375                 :             :        signed short *ptr = ...;
     376                 :             :        signed short C = *ptr;
     377                 :             :        unsigned short B = (unsigned short) C;    // sign change
     378                 :             :        signed int A = (signed int) B;            // unsigned promotion
     379                 :             :        ...possible other uses of A...
     380                 :             :        unsigned int OP = (unsigned int) A;       // sign change
     381                 :             : 
     382                 :             :    In this case it's possible to go directly from C to OP using:
     383                 :             : 
     384                 :             :        OP = (unsigned int) (unsigned short) C;
     385                 :             :             +------------+ +--------------+
     386                 :             :                promotion      sign change
     387                 :             : 
     388                 :             :    so OP' would be C.  The input to the promotion is B, so UNPROM
     389                 :             :    would describe B.  */
     390                 :             : 
     391                 :             : static tree
     392                 :     6979785 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     393                 :             :                                       vect_unpromoted_value *unprom,
     394                 :             :                                       bool *single_use_p = NULL)
     395                 :             : {
     396                 :     6979785 :   tree op_type = TREE_TYPE (op);
     397                 :     6979785 :   if (!INTEGRAL_TYPE_P (op_type))
     398                 :             :     return NULL_TREE;
     399                 :             : 
     400                 :     6955506 :   tree res = NULL_TREE;
     401                 :     6955506 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     402                 :     6955506 :   unsigned int min_precision = orig_precision;
     403                 :     6955506 :   stmt_vec_info caster = NULL;
     404                 :     8368478 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     405                 :             :     {
     406                 :             :       /* See whether OP is simple enough to vectorize.  */
     407                 :     8191775 :       stmt_vec_info def_stmt_info;
     408                 :     8191775 :       gimple *def_stmt;
     409                 :     8191775 :       vect_def_type dt;
     410                 :     8191775 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     411                 :             :         break;
     412                 :             : 
     413                 :             :       /* If OP is the input of a demotion, skip over it to see whether
     414                 :             :          OP is itself the result of a promotion.  If so, the combined
     415                 :             :          effect of the promotion and the demotion might fit the required
     416                 :             :          pattern, otherwise neither operation fits.
     417                 :             : 
     418                 :             :          This copes with cases such as the result of an arithmetic
     419                 :             :          operation being truncated before being stored, and where that
     420                 :             :          arithmetic operation has been recognized as an over-widened one.  */
     421                 :     8186991 :       if (TYPE_PRECISION (op_type) <= min_precision)
     422                 :             :         {
     423                 :             :           /* Use OP as the UNPROM described above if we haven't yet
     424                 :             :              found a promotion, or if using the new input preserves the
     425                 :             :              sign of the previous promotion.  */
     426                 :     8065057 :           if (!res
     427                 :     1205118 :               || TYPE_PRECISION (unprom->type) == orig_precision
     428                 :       38112 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
     429                 :     8099737 :               || (TYPE_UNSIGNED (op_type)
     430                 :       22682 :                   && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
     431                 :             :             {
     432                 :     8031040 :               unprom->set_op (op, dt, caster);
     433                 :     8031040 :               min_precision = TYPE_PRECISION (op_type);
     434                 :             :             }
     435                 :             :           /* Stop if we've already seen a promotion and if this
     436                 :             :              conversion does more than change the sign.  */
     437                 :       34017 :           else if (TYPE_PRECISION (op_type)
     438                 :       34017 :                    != TYPE_PRECISION (unprom->type))
     439                 :             :             break;
     440                 :             : 
     441                 :             :           /* The sequence now extends to OP.  */
     442                 :             :           res = op;
     443                 :             :         }
     444                 :             : 
     445                 :             :       /* See whether OP is defined by a cast.  Record it as CASTER if
     446                 :             :          the cast is potentially vectorizable.  */
     447                 :     8186950 :       if (!def_stmt)
     448                 :             :         break;
     449                 :     8014943 :       caster = def_stmt_info;
     450                 :             : 
     451                 :             :       /* Ignore pattern statements, since we don't link uses for them.  */
     452                 :     8014943 :       if (caster
     453                 :     8014943 :           && single_use_p
     454                 :     1479579 :           && !STMT_VINFO_RELATED_STMT (caster)
     455                 :     9363653 :           && !has_single_use (res))
     456                 :      898439 :         *single_use_p = false;
     457                 :             : 
     458                 :    14793746 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     459                 :     5180137 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     460                 :             :         break;
     461                 :             : 
     462                 :             :       /* Continue with the input to the cast.  */
     463                 :     1412972 :       op = gimple_assign_rhs1 (def_stmt);
     464                 :     1412972 :       op_type = TREE_TYPE (op);
     465                 :             :     }
     466                 :             :   return res;
     467                 :             : }
     468                 :             : 
     469                 :             : /* OP is an integer operand to an operation that returns TYPE, and we
     470                 :             :    want to treat the operation as a widening one.  So far we can treat
     471                 :             :    it as widening from *COMMON_TYPE.
     472                 :             : 
     473                 :             :    Return true if OP is suitable for such a widening operation,
     474                 :             :    either widening from *COMMON_TYPE or from some supertype of it.
     475                 :             :    Update *COMMON_TYPE to the supertype in the latter case.
     476                 :             : 
     477                 :             :    SHIFT_P is true if OP is a shift amount.  */
     478                 :             : 
     479                 :             : static bool
     480                 :      268479 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     481                 :             :                             tree *common_type)
     482                 :             : {
     483                 :             :   /* Calculate the minimum precision required by OP, without changing
     484                 :             :      the sign of either operand.  */
     485                 :      268479 :   unsigned int precision;
     486                 :      268479 :   if (shift_p)
     487                 :             :     {
     488                 :       13004 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     489                 :             :         return false;
     490                 :       10453 :       precision = TREE_INT_CST_LOW (op);
     491                 :             :     }
     492                 :             :   else
     493                 :             :     {
     494                 :      255475 :       precision = wi::min_precision (wi::to_widest (op),
     495                 :      255475 :                                      TYPE_SIGN (*common_type));
     496                 :      255475 :       if (precision * 2 > TYPE_PRECISION (type))
     497                 :             :         return false;
     498                 :             :     }
     499                 :             : 
     500                 :             :   /* If OP requires a wider type, switch to that type.  The checks
     501                 :             :      above ensure that this is still narrower than the result.  */
     502                 :      253268 :   precision = vect_element_precision (precision);
     503                 :      253268 :   if (TYPE_PRECISION (*common_type) < precision)
     504                 :        6146 :     *common_type = build_nonstandard_integer_type
     505                 :        6146 :       (precision, TYPE_UNSIGNED (*common_type));
     506                 :             :   return true;
     507                 :             : }
     508                 :             : 
     509                 :             : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     510                 :             :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     511                 :             : 
     512                 :             : static bool
     513                 :       38898 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     514                 :             : {
     515                 :       38898 :   if (types_compatible_p (*common_type, new_type))
     516                 :             :     return true;
     517                 :             : 
     518                 :             :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     519                 :        6911 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     520                 :        6911 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     521                 :             :     return true;
     522                 :             : 
     523                 :             :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     524                 :        6383 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     525                 :        6383 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     526                 :             :     {
     527                 :         347 :       *common_type = new_type;
     528                 :         347 :       return true;
     529                 :             :     }
     530                 :             : 
     531                 :             :   /* We have mismatched signs, with the signed type being
     532                 :             :      no wider than the unsigned type.  In this case we need
     533                 :             :      a wider signed type.  */
     534                 :        6036 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     535                 :             :                                 TYPE_PRECISION (new_type));
     536                 :        6036 :   precision *= 2;
     537                 :             : 
     538                 :        6036 :   if (precision * 2 > TYPE_PRECISION (type))
     539                 :             :     return false;
     540                 :             : 
     541                 :          16 :   *common_type = build_nonstandard_integer_type (precision, false);
     542                 :          16 :   return true;
     543                 :             : }
     544                 :             : 
     545                 :             : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     546                 :             :    in which each node either performs CODE or WIDENED_CODE, and where
     547                 :             :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     548                 :             :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     549                 :             :    CODE and WIDENED_CODE are some sort of shift.
     550                 :             : 
     551                 :             :    If STMT_INFO is such a tree, return the number of leaf operands
     552                 :             :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     553                 :             :    to a type that (a) is narrower than the result of STMT_INFO and
     554                 :             :    (b) can hold all leaf operand values.
     555                 :             : 
     556                 :             :    If SUBTYPE then allow that the signs of the operands
     557                 :             :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     558                 :             :    this.
     559                 :             : 
     560                 :             :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     561                 :             :    exists.  */
     562                 :             : 
     563                 :             : static unsigned int
     564                 :   110225839 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     565                 :             :                       code_helper widened_code, bool shift_p,
     566                 :             :                       unsigned int max_nops,
     567                 :             :                       vect_unpromoted_value *unprom, tree *common_type,
     568                 :             :                       enum optab_subtype *subtype = NULL)
     569                 :             : {
     570                 :             :   /* Check for an integer operation with the right code.  */
     571                 :   110225839 :   gimple* stmt = stmt_info->stmt;
     572                 :   110225839 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     573                 :             :     return 0;
     574                 :             : 
     575                 :    89044733 :   code_helper rhs_code;
     576                 :    89044733 :   if (is_gimple_assign (stmt))
     577                 :    75893260 :     rhs_code = gimple_assign_rhs_code (stmt);
     578                 :    13151473 :   else if (is_gimple_call (stmt))
     579                 :    13151473 :     rhs_code = gimple_call_combined_fn (stmt);
     580                 :             :   else
     581                 :             :     return 0;
     582                 :             : 
     583                 :    89044733 :   if (rhs_code != code
     584                 :    89044733 :       && rhs_code != widened_code)
     585                 :             :     return 0;
     586                 :             : 
     587                 :     5469382 :   tree lhs = gimple_get_lhs (stmt);
     588                 :     5469382 :   tree type = TREE_TYPE (lhs);
     589                 :     5469382 :   if (!INTEGRAL_TYPE_P (type))
     590                 :             :     return 0;
     591                 :             : 
     592                 :             :   /* Assume that both operands will be leaf operands.  */
     593                 :     4926732 :   max_nops -= 2;
     594                 :             : 
     595                 :             :   /* Check the operands.  */
     596                 :     4926732 :   unsigned int next_op = 0;
     597                 :     5596850 :   for (unsigned int i = 0; i < 2; ++i)
     598                 :             :     {
     599                 :     5310338 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     600                 :     5310338 :       unsigned int nops = 1;
     601                 :     5310338 :       tree op = gimple_arg (stmt, i);
     602                 :     5310338 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     603                 :             :         {
     604                 :             :           /* We already have a common type from earlier operands.
     605                 :             :              Update it to account for OP.  */
     606                 :      268479 :           this_unprom->set_op (op, vect_constant_def);
     607                 :      268479 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     608                 :             :             return 0;
     609                 :             :         }
     610                 :             :       else
     611                 :             :         {
     612                 :             :           /* Only allow shifts by constants.  */
     613                 :     5041859 :           if (shift_p && i == 1)
     614                 :             :             return 0;
     615                 :             : 
     616                 :     5034400 :           if (rhs_code != code)
     617                 :             :             {
     618                 :             :               /* If rhs_code is widened_code, don't look through further
     619                 :             :                  possible promotions, there is a promotion already embedded
     620                 :             :                  in the WIDEN_*_EXPR.  */
     621                 :        1967 :               if (TREE_CODE (op) != SSA_NAME
     622                 :        1967 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     623                 :           0 :                 return 0;
     624                 :             : 
     625                 :        1967 :               stmt_vec_info def_stmt_info;
     626                 :        1967 :               gimple *def_stmt;
     627                 :        1967 :               vect_def_type dt;
     628                 :        1967 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     629                 :             :                                        &def_stmt))
     630                 :             :                 return 0;
     631                 :        1967 :               this_unprom->set_op (op, dt, NULL);
     632                 :             :             }
     633                 :     5032433 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     634                 :             :                                                           this_unprom))
     635                 :             :             return 0;
     636                 :             : 
     637                 :     4940546 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     638                 :             :             {
     639                 :             :               /* The operand isn't widened.  If STMT_INFO has the code
     640                 :             :                  for an unwidened operation, recursively check whether
     641                 :             :                  this operand is a node of the tree.  */
     642                 :     4513830 :               if (rhs_code != code
     643                 :     4513830 :                   || max_nops == 0
     644                 :     4514256 :                   || this_unprom->dt != vect_internal_def)
     645                 :             :                 return 0;
     646                 :             : 
     647                 :             :               /* Give back the leaf slot allocated above now that we're
     648                 :             :                  not treating this as a leaf operand.  */
     649                 :         426 :               max_nops += 1;
     650                 :             : 
     651                 :             :               /* Recursively process the definition of the operand.  */
     652                 :         426 :               stmt_vec_info def_stmt_info
     653                 :         426 :                 = vect_get_internal_def (vinfo, this_unprom->op);
     654                 :             : 
     655                 :         426 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     656                 :             :                                            widened_code, shift_p, max_nops,
     657                 :             :                                            this_unprom, common_type,
     658                 :             :                                            subtype);
     659                 :         426 :               if (nops == 0)
     660                 :             :                 return 0;
     661                 :             : 
     662                 :         287 :               max_nops -= nops;
     663                 :             :             }
     664                 :             :           else
     665                 :             :             {
     666                 :             :               /* Make sure that the operand is narrower than the result.  */
     667                 :      426716 :               if (TYPE_PRECISION (this_unprom->type) * 2
     668                 :      426716 :                   > TYPE_PRECISION (type))
     669                 :             :                 return 0;
     670                 :             : 
     671                 :             :               /* Update COMMON_TYPE for the new operand.  */
     672                 :      422373 :               if (i == 0)
     673                 :      383475 :                 *common_type = this_unprom->type;
     674                 :       38898 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     675                 :             :                                                  common_type))
     676                 :             :                 {
     677                 :        6020 :                   if (subtype)
     678                 :             :                     {
     679                 :             :                       /* See if we can sign extend the smaller type.  */
     680                 :         210 :                       if (TYPE_PRECISION (this_unprom->type)
     681                 :         210 :                           > TYPE_PRECISION (*common_type))
     682                 :          36 :                         *common_type = this_unprom->type;
     683                 :         210 :                       *subtype = optab_vector_mixed_sign;
     684                 :             :                     }
     685                 :             :                   else
     686                 :             :                     return 0;
     687                 :             :                 }
     688                 :             :             }
     689                 :             :         }
     690                 :      670118 :       next_op += nops;
     691                 :             :     }
     692                 :             :   return next_op;
     693                 :             : }
     694                 :             : 
     695                 :             : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     696                 :             :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     697                 :             : 
     698                 :             : static tree
     699                 :     1469446 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     700                 :             : {
     701                 :           0 :   return make_temp_ssa_name (type, stmt, "patt");
     702                 :             : }
     703                 :             : 
     704                 :             : /* STMT2_INFO describes a type conversion that could be split into STMT1
     705                 :             :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     706                 :             :    input.  Try to do this using pattern statements, returning true on
     707                 :             :    success.  */
     708                 :             : 
     709                 :             : static bool
     710                 :       30699 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     711                 :             :                       gimple *stmt1, tree vectype)
     712                 :             : {
     713                 :       30699 :   if (is_pattern_stmt_p (stmt2_info))
     714                 :             :     {
     715                 :             :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     716                 :             :          the pattern is attached.  */
     717                 :        1079 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     718                 :        1079 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     719                 :             : 
     720                 :        1079 :       if (dump_enabled_p ())
     721                 :          22 :         dump_printf_loc (MSG_NOTE, vect_location,
     722                 :             :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     723                 :             : 
     724                 :             :       /* Since STMT2_INFO is a pattern statement, we can change it
     725                 :             :          in-situ without worrying about changing the code for the
     726                 :             :          containing block.  */
     727                 :        1079 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     728                 :             : 
     729                 :        1079 :       if (dump_enabled_p ())
     730                 :             :         {
     731                 :          22 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     732                 :          22 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     733                 :             :                            stmt2_info->stmt);
     734                 :             :         }
     735                 :             : 
     736                 :        1079 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     737                 :        1079 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     738                 :             :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     739                 :             :            to the end of the definition sequence.  */
     740                 :        1079 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     741                 :             :       else
     742                 :             :         {
     743                 :             :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     744                 :             :              before it.  */
     745                 :           0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     746                 :           0 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     747                 :             :         }
     748                 :        1079 :       return true;
     749                 :             :     }
     750                 :             :   else
     751                 :             :     {
     752                 :             :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     753                 :             :          two-statement pattern now.  */
     754                 :       29620 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     755                 :       29620 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     756                 :       29620 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     757                 :       29620 :       if (!lhs_vectype)
     758                 :             :         return false;
     759                 :             : 
     760                 :       29620 :       if (dump_enabled_p ())
     761                 :        2158 :         dump_printf_loc (MSG_NOTE, vect_location,
     762                 :             :                          "Splitting statement: %G", stmt2_info->stmt);
     763                 :             : 
     764                 :             :       /* Add STMT1 as a singleton pattern definition sequence.  */
     765                 :       29620 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     766                 :       29620 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     767                 :       29620 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     768                 :             : 
     769                 :             :       /* Build the second of the two pattern statements.  */
     770                 :       29620 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     771                 :       29620 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     772                 :       29620 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     773                 :             : 
     774                 :       29620 :       if (dump_enabled_p ())
     775                 :             :         {
     776                 :        2158 :           dump_printf_loc (MSG_NOTE, vect_location,
     777                 :             :                            "into pattern statements: %G", stmt1);
     778                 :        2158 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     779                 :             :                            (gimple *) new_stmt2);
     780                 :             :         }
     781                 :             : 
     782                 :       29620 :       return true;
     783                 :             :     }
     784                 :             : }
     785                 :             : 
     786                 :             : /* Look for the following pattern
     787                 :             :         X = x[i]
     788                 :             :         Y = y[i]
     789                 :             :         DIFF = X - Y
     790                 :             :         DAD = ABS_EXPR<DIFF>
     791                 :             : 
     792                 :             :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     793                 :             :    HALF_TYPE and UNPROM will be set should the statement be found to
     794                 :             :    be a widened operation.
     795                 :             :    DIFF_STMT will be set to the MINUS_EXPR
     796                 :             :    statement that precedes the ABS_STMT if it is a MINUS_EXPR..
     797                 :             :  */
     798                 :             : static bool
     799                 :    18718877 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     800                 :             :                                 tree *half_type,
     801                 :             :                                 vect_unpromoted_value unprom[2],
     802                 :             :                                 gassign **diff_stmt)
     803                 :             : {
     804                 :    18718877 :   if (!abs_stmt)
     805                 :             :     return false;
     806                 :             : 
     807                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     808                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     809                 :    18718877 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     810                 :    18718877 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     811                 :             :     return false;
     812                 :             : 
     813                 :       21922 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     814                 :       21922 :   tree abs_type = TREE_TYPE (abs_oprnd);
     815                 :       21922 :   if (!abs_oprnd)
     816                 :             :     return false;
     817                 :       16069 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     818                 :        6066 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     819                 :       27863 :       || TYPE_UNSIGNED (abs_type))
     820                 :             :     return false;
     821                 :             : 
     822                 :             :   /* Peel off conversions from the ABS input.  This can involve sign
     823                 :             :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     824                 :             :      or signed promotion, but it can't include unsigned promotion.
     825                 :             :      (Note that ABS of an unsigned promotion should have been folded
     826                 :             :      away before now anyway.)  */
     827                 :        5941 :   vect_unpromoted_value unprom_diff;
     828                 :        5941 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     829                 :             :                                                     &unprom_diff);
     830                 :        5941 :   if (!abs_oprnd)
     831                 :             :     return false;
     832                 :        5728 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     833                 :        5728 :       && TYPE_UNSIGNED (unprom_diff.type))
     834                 :             :     return false;
     835                 :             : 
     836                 :             :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     837                 :        5728 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     838                 :        5728 :   if (!diff_stmt_vinfo)
     839                 :             :     return false;
     840                 :             : 
     841                 :        5000 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     842                 :        5000 :   if (diff_stmt && diff
     843                 :        4077 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     844                 :        7044 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     845                 :        1052 :     *diff_stmt = diff;
     846                 :             : 
     847                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     848                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     849                 :        5000 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     850                 :             :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     851                 :             :                             false, 2, unprom, half_type))
     852                 :             :     return true;
     853                 :             : 
     854                 :             :   return false;
     855                 :             : }
     856                 :             : 
     857                 :             : /* Convert UNPROM to TYPE and return the result, adding new statements
     858                 :             :    to STMT_INFO's pattern definition statements if no better way is
     859                 :             :    available.  VECTYPE is the vector form of TYPE.
     860                 :             : 
     861                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     862                 :             : 
     863                 :             : static tree
     864                 :      407963 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     865                 :             :                     vect_unpromoted_value *unprom, tree vectype,
     866                 :             :                     enum optab_subtype subtype = optab_default)
     867                 :             : {
     868                 :             :   /* Update the type if the signs differ.  */
     869                 :      407963 :   if (subtype == optab_vector_mixed_sign)
     870                 :             :     {
     871                 :         164 :       gcc_assert (!TYPE_UNSIGNED (type));
     872                 :         164 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     873                 :             :         {
     874                 :          82 :           type = unsigned_type_for (type);
     875                 :          82 :           vectype = unsigned_type_for (vectype);
     876                 :             :         }
     877                 :             :     }
     878                 :             : 
     879                 :             :   /* Check for a no-op conversion.  */
     880                 :      407963 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     881                 :      142440 :     return unprom->op;
     882                 :             : 
     883                 :             :   /* Allow the caller to create constant vect_unpromoted_values.  */
     884                 :      265523 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     885                 :      163770 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     886                 :             : 
     887                 :      101753 :   tree input = unprom->op;
     888                 :      101753 :   if (unprom->caster)
     889                 :             :     {
     890                 :       52062 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     891                 :       52062 :       tree lhs_type = TREE_TYPE (lhs);
     892                 :             : 
     893                 :             :       /* If the result of the existing cast is the right width, use it
     894                 :             :          instead of the source of the cast.  */
     895                 :       52062 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     896                 :             :         input = lhs;
     897                 :             :       /* If the precision we want is between the source and result
     898                 :             :          precisions of the existing cast, try splitting the cast into
     899                 :             :          two and tapping into a mid-way point.  */
     900                 :       50329 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     901                 :       50329 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     902                 :             :         {
     903                 :             :           /* In order to preserve the semantics of the original cast,
     904                 :             :              give the mid-way point the same signedness as the input value.
     905                 :             : 
     906                 :             :              It would be possible to use a signed type here instead if
     907                 :             :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     908                 :             :              make the sign of the midtype sensitive to the order in
     909                 :             :              which we process the statements, since the signedness of
     910                 :             :              TYPE is the signedness required by just one of possibly
     911                 :             :              many users.  Also, unsigned promotions are usually as cheap
     912                 :             :              as or cheaper than signed ones, so it's better to keep an
     913                 :             :              unsigned promotion.  */
     914                 :       30699 :           tree midtype = build_nonstandard_integer_type
     915                 :       30699 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     916                 :       30699 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     917                 :       30699 :           if (vec_midtype)
     918                 :             :             {
     919                 :       30699 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     920                 :       30699 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     921                 :             :                                                        unprom->op);
     922                 :       30699 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     923                 :             :                                          vec_midtype))
     924                 :           0 :                 append_pattern_def_seq (vinfo, stmt_info,
     925                 :             :                                         new_stmt, vec_midtype);
     926                 :             :             }
     927                 :             :         }
     928                 :             : 
     929                 :             :       /* See if we can reuse an existing result.  */
     930                 :       52062 :       if (types_compatible_p (type, TREE_TYPE (input)))
     931                 :             :         return input;
     932                 :             :     }
     933                 :             : 
     934                 :             :   /* We need a new conversion statement.  */
     935                 :       78663 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     936                 :       78663 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     937                 :             : 
     938                 :             :   /* If OP is an external value, see if we can insert the new statement
     939                 :             :      on an incoming edge.  */
     940                 :       78663 :   if (input == unprom->op && unprom->dt == vect_external_def)
     941                 :        4752 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     942                 :             :       {
     943                 :         482 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     944                 :         482 :         gcc_assert (!new_bb);
     945                 :             :         return new_op;
     946                 :             :       }
     947                 :             : 
     948                 :             :   /* As a (common) last resort, add the statement to the pattern itself.  */
     949                 :       78181 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     950                 :       78181 :   return new_op;
     951                 :             : }
     952                 :             : 
     953                 :             : /* Invoke vect_convert_input for N elements of UNPROM and store the
     954                 :             :    result in the corresponding elements of RESULT.
     955                 :             : 
     956                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     957                 :             : 
     958                 :             : static void
     959                 :      206767 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     960                 :             :                      tree *result, tree type, vect_unpromoted_value *unprom,
     961                 :             :                      tree vectype, enum optab_subtype subtype = optab_default)
     962                 :             : {
     963                 :      614471 :   for (unsigned int i = 0; i < n; ++i)
     964                 :             :     {
     965                 :             :       unsigned int j;
     966                 :      608400 :       for (j = 0; j < i; ++j)
     967                 :      200937 :         if (unprom[j].op == unprom[i].op)
     968                 :             :           break;
     969                 :             : 
     970                 :      407704 :       if (j < i)
     971                 :         241 :         result[i] = result[j];
     972                 :             :       else
     973                 :      407463 :         result[i] = vect_convert_input (vinfo, stmt_info,
     974                 :      407463 :                                         type, &unprom[i], vectype, subtype);
     975                 :             :     }
     976                 :      206767 : }
     977                 :             : 
     978                 :             : /* The caller has created a (possibly empty) sequence of pattern definition
     979                 :             :    statements followed by a single statement PATTERN_STMT.  Cast the result
     980                 :             :    of this final statement to TYPE.  If a new statement is needed, add
     981                 :             :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     982                 :             :    and return the new statement, otherwise return PATTERN_STMT as-is.
     983                 :             :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     984                 :             : 
     985                 :             : static gimple *
     986                 :      230510 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     987                 :             :                      gimple *pattern_stmt, tree vecitype)
     988                 :             : {
     989                 :      230510 :   tree lhs = gimple_get_lhs (pattern_stmt);
     990                 :      230510 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
     991                 :             :     {
     992                 :      206247 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
     993                 :      206247 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
     994                 :      206247 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
     995                 :             :     }
     996                 :      230510 :   return pattern_stmt;
     997                 :             : }
     998                 :             : 
     999                 :             : /* Return true if STMT_VINFO describes a reduction for which reassociation
    1000                 :             :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1001                 :             :    a reduction chain and optimistically assume that all statements
    1002                 :             :    except the last allow reassociation.
    1003                 :             :    Also require it to have code CODE and to be a reduction
    1004                 :             :    in the outermost loop.  When returning true, store the operands in
    1005                 :             :    *OP0_OUT and *OP1_OUT.  */
    1006                 :             : 
    1007                 :             : static bool
    1008                 :    82332340 : vect_reassociating_reduction_p (vec_info *vinfo,
    1009                 :             :                                 stmt_vec_info stmt_info, tree_code code,
    1010                 :             :                                 tree *op0_out, tree *op1_out)
    1011                 :             : {
    1012                 :    82332340 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1013                 :     9437101 :   if (!loop_info)
    1014                 :             :     return false;
    1015                 :             : 
    1016                 :     9437101 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1017                 :    10282204 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1018                 :             :     return false;
    1019                 :             : 
    1020                 :             :   /* We don't allow changing the order of the computation in the inner-loop
    1021                 :             :      when doing outer-loop vectorization.  */
    1022                 :     1994614 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1023                 :    84229372 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1024                 :             :     return false;
    1025                 :             : 
    1026                 :     1947133 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
    1027                 :             :     {
    1028                 :       99117 :       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1029                 :             :                                        code))
    1030                 :             :         return false;
    1031                 :             :     }
    1032                 :     1848016 :   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
    1033                 :             :     return false;
    1034                 :             : 
    1035                 :       97582 :   *op0_out = gimple_assign_rhs1 (assign);
    1036                 :       97582 :   *op1_out = gimple_assign_rhs2 (assign);
    1037                 :       97582 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1038                 :       29276 :     std::swap (*op0_out, *op1_out);
    1039                 :             :   return true;
    1040                 :             : }
    1041                 :             : 
    1042                 :             : /* match.pd function to match
    1043                 :             :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1044                 :             :    with conditions:
    1045                 :             :    1) @1, @2, c, d, a, b are all integral type.
    1046                 :             :    2) There's single_use for both @1 and @2.
    1047                 :             :    3) a, c have same precision.
    1048                 :             :    4) c and @1 have different precision.
    1049                 :             :    5) c, d are the same type or they can differ in sign when convert is
    1050                 :             :    truncation.
    1051                 :             : 
    1052                 :             :    record a and c and d and @3.  */
    1053                 :             : 
    1054                 :             : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1055                 :             : 
    1056                 :             : /* Function vect_recog_cond_expr_convert
    1057                 :             : 
    1058                 :             :    Try to find the following pattern:
    1059                 :             : 
    1060                 :             :    TYPE_AB A,B;
    1061                 :             :    TYPE_CD C,D;
    1062                 :             :    TYPE_E E;
    1063                 :             :    TYPE_E op_true = (TYPE_E) A;
    1064                 :             :    TYPE_E op_false = (TYPE_E) B;
    1065                 :             : 
    1066                 :             :    E = C cmp D ? op_true : op_false;
    1067                 :             : 
    1068                 :             :    where
    1069                 :             :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1070                 :             :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1071                 :             :    single_use of op_true and op_false.
    1072                 :             :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1073                 :             : 
    1074                 :             :    Input:
    1075                 :             : 
    1076                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1077                 :             :    here it starts with E = c cmp D ? op_true : op_false;
    1078                 :             : 
    1079                 :             :    Output:
    1080                 :             : 
    1081                 :             :    TYPE1 E' = C cmp D ? A : B;
    1082                 :             :    TYPE3 E = (TYPE3) E';
    1083                 :             : 
    1084                 :             :    There may extra nop_convert for A or B to handle different signness.
    1085                 :             : 
    1086                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1087                 :             : 
    1088                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1089                 :             :    stmts that constitute the pattern. In this case it will be:
    1090                 :             :    E = (TYPE3)E';
    1091                 :             :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1092                 :             : 
    1093                 :             : static gimple *
    1094                 :    27504562 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1095                 :             :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1096                 :             : {
    1097                 :    46303990 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1098                 :    18799486 :   tree lhs, match[4], temp, type, new_lhs, op2;
    1099                 :    18799486 :   gimple *cond_stmt;
    1100                 :    18799486 :   gimple *pattern_stmt;
    1101                 :             : 
    1102                 :    18799486 :   if (!last_stmt)
    1103                 :             :     return NULL;
    1104                 :             : 
    1105                 :    18799486 :   lhs = gimple_assign_lhs (last_stmt);
    1106                 :             : 
    1107                 :             :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1108                 :             :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1109                 :    18799486 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1110                 :             :     return NULL;
    1111                 :             : 
    1112                 :          58 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1113                 :             : 
    1114                 :          58 :   op2 = match[2];
    1115                 :          58 :   type = TREE_TYPE (match[1]);
    1116                 :          58 :   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1117                 :             :     {
    1118                 :          24 :       op2 = vect_recog_temp_ssa_var (type, NULL);
    1119                 :          24 :       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1120                 :          24 :       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
    1121                 :             :                               get_vectype_for_scalar_type (vinfo, type));
    1122                 :             :     }
    1123                 :             : 
    1124                 :          58 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1125                 :          58 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1126                 :             :                                                  match[1], op2));
    1127                 :          58 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
    1128                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    1129                 :          58 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1130                 :          58 :   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
    1131                 :          58 :   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
    1132                 :             : 
    1133                 :          58 :   if (dump_enabled_p ())
    1134                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1135                 :             :                      "created pattern stmt: %G", pattern_stmt);
    1136                 :             :   return pattern_stmt;
    1137                 :             : }
    1138                 :             : 
    1139                 :             : /* Function vect_recog_dot_prod_pattern
    1140                 :             : 
    1141                 :             :    Try to find the following pattern:
    1142                 :             : 
    1143                 :             :      type1a x_t
    1144                 :             :      type1b y_t;
    1145                 :             :      TYPE1 prod;
    1146                 :             :      TYPE2 sum = init;
    1147                 :             :    loop:
    1148                 :             :      sum_0 = phi <init, sum_1>
    1149                 :             :      S1  x_t = ...
    1150                 :             :      S2  y_t = ...
    1151                 :             :      S3  x_T = (TYPE1) x_t;
    1152                 :             :      S4  y_T = (TYPE1) y_t;
    1153                 :             :      S5  prod = x_T * y_T;
    1154                 :             :      [S6  prod = (TYPE2) prod;  #optional]
    1155                 :             :      S7  sum_1 = prod + sum_0;
    1156                 :             : 
    1157                 :             :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1158                 :             :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1159                 :             :    'type1a' and 'type1b' can differ.
    1160                 :             : 
    1161                 :             :    Input:
    1162                 :             : 
    1163                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1164                 :             :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1165                 :             :    will be detected.
    1166                 :             : 
    1167                 :             :    Output:
    1168                 :             : 
    1169                 :             :    * TYPE_OUT: The type of the output  of this pattern.
    1170                 :             : 
    1171                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1172                 :             :    stmts that constitute the pattern. In this case it will be:
    1173                 :             :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1174                 :             : 
    1175                 :             :    Note: The dot-prod idiom is a widening reduction pattern that is
    1176                 :             :          vectorized without preserving all the intermediate results. It
    1177                 :             :          produces only N/2 (widened) results (by summing up pairs of
    1178                 :             :          intermediate results) rather than all N results.  Therefore, we
    1179                 :             :          cannot allow this pattern when we want to get all the results and in
    1180                 :             :          the correct order (as is the case when this computation is in an
    1181                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    1182                 :             : 
    1183                 :             : static gimple *
    1184                 :    27444468 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1185                 :             :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1186                 :             : {
    1187                 :    27444468 :   tree oprnd0, oprnd1;
    1188                 :    27444468 :   gimple *last_stmt = stmt_vinfo->stmt;
    1189                 :    27444468 :   tree type, half_type;
    1190                 :    27444468 :   gimple *pattern_stmt;
    1191                 :    27444468 :   tree var;
    1192                 :             : 
    1193                 :             :   /* Look for the following pattern
    1194                 :             :           DX = (TYPE1) X;
    1195                 :             :           DY = (TYPE1) Y;
    1196                 :             :           DPROD = DX * DY;
    1197                 :             :           DDPROD = (TYPE2) DPROD;
    1198                 :             :           sum_1 = DDPROD + sum_0;
    1199                 :             :      In which
    1200                 :             :      - DX is double the size of X
    1201                 :             :      - DY is double the size of Y
    1202                 :             :      - DX, DY, DPROD all have the same type but the sign
    1203                 :             :        between X, Y and DPROD can differ.
    1204                 :             :      - sum is the same size of DPROD or bigger
    1205                 :             :      - sum has been recognized as a reduction variable.
    1206                 :             : 
    1207                 :             :      This is equivalent to:
    1208                 :             :        DPROD = X w* Y;          #widen mult
    1209                 :             :        sum_1 = DPROD w+ sum_0;  #widen summation
    1210                 :             :      or
    1211                 :             :        DPROD = X w* Y;          #widen mult
    1212                 :             :        sum_1 = DPROD + sum_0;   #summation
    1213                 :             :    */
    1214                 :             : 
    1215                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1216                 :             :      of the above pattern.  */
    1217                 :             : 
    1218                 :    27444468 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1219                 :             :                                        &oprnd0, &oprnd1))
    1220                 :             :     return NULL;
    1221                 :             : 
    1222                 :       32886 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1223                 :             : 
    1224                 :       32886 :   vect_unpromoted_value unprom_mult;
    1225                 :       32886 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1226                 :             : 
    1227                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1228                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1229                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1230                 :             :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1231                 :       32886 :   if (!oprnd0)
    1232                 :             :     return NULL;
    1233                 :             : 
    1234                 :       24678 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1235                 :       24678 :   if (!mult_vinfo)
    1236                 :             :     return NULL;
    1237                 :             : 
    1238                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1239                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1240                 :       71907 :   vect_unpromoted_value unprom0[2];
    1241                 :       23969 :   enum optab_subtype subtype = optab_vector;
    1242                 :       23969 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1243                 :             :                              false, 2, unprom0, &half_type, &subtype))
    1244                 :             :     return NULL;
    1245                 :             : 
    1246                 :             :   /* If there are two widening operations, make sure they agree on the sign
    1247                 :             :      of the extension.  The result of an optab_vector_mixed_sign operation
    1248                 :             :      is signed; otherwise, the result has the same sign as the operands.  */
    1249                 :         951 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1250                 :        1567 :       && (subtype == optab_vector_mixed_sign
    1251                 :         616 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1252                 :         428 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1253                 :             :     return NULL;
    1254                 :             : 
    1255                 :         843 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1256                 :             : 
    1257                 :             :   /* If the inputs have mixed signs, canonicalize on using the signed
    1258                 :             :      input type for analysis.  This also helps when emulating mixed-sign
    1259                 :             :      operations using signed operations.  */
    1260                 :         843 :   if (subtype == optab_vector_mixed_sign)
    1261                 :         150 :     half_type = signed_type_for (half_type);
    1262                 :             : 
    1263                 :         843 :   tree half_vectype;
    1264                 :         843 :   if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1265                 :             :                                         type_out, &half_vectype, subtype))
    1266                 :             :     {
    1267                 :             :       /* We can emulate a mixed-sign dot-product using a sequence of
    1268                 :             :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1269                 :         424 :       if (subtype != optab_vector_mixed_sign
    1270                 :         424 :           || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
    1271                 :             :                                                DOT_PROD_EXPR, half_type,
    1272                 :             :                                                type_out, &half_vectype,
    1273                 :             :                                                optab_vector))
    1274                 :         418 :         return NULL;
    1275                 :             : 
    1276                 :           6 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1277                 :             :                                                *type_out);
    1278                 :             :     }
    1279                 :             : 
    1280                 :             :   /* Get the inputs in the appropriate types.  */
    1281                 :         425 :   tree mult_oprnd[2];
    1282                 :         425 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1283                 :             :                        unprom0, half_vectype, subtype);
    1284                 :             : 
    1285                 :         425 :   var = vect_recog_temp_ssa_var (type, NULL);
    1286                 :         425 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1287                 :             :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1288                 :             : 
    1289                 :         425 :   return pattern_stmt;
    1290                 :             : }
    1291                 :             : 
    1292                 :             : 
    1293                 :             : /* Function vect_recog_sad_pattern
    1294                 :             : 
    1295                 :             :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1296                 :             : 
    1297                 :             :      type x_t, y_t;
    1298                 :             :      signed TYPE1 diff, abs_diff;
    1299                 :             :      TYPE2 sum = init;
    1300                 :             :    loop:
    1301                 :             :      sum_0 = phi <init, sum_1>
    1302                 :             :      S1  x_t = ...
    1303                 :             :      S2  y_t = ...
    1304                 :             :      S3  x_T = (TYPE1) x_t;
    1305                 :             :      S4  y_T = (TYPE1) y_t;
    1306                 :             :      S5  diff = x_T - y_T;
    1307                 :             :      S6  abs_diff = ABS_EXPR <diff>;
    1308                 :             :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1309                 :             :      S8  sum_1 = abs_diff + sum_0;
    1310                 :             : 
    1311                 :             :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1312                 :             :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1313                 :             :    computation.
    1314                 :             : 
    1315                 :             :    Input:
    1316                 :             : 
    1317                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1318                 :             :    example, when this function is called with S8, the pattern
    1319                 :             :    {S3,S4,S5,S6,S7,S8} will be detected.
    1320                 :             : 
    1321                 :             :    Output:
    1322                 :             : 
    1323                 :             :    * TYPE_OUT: The type of the output of this pattern.
    1324                 :             : 
    1325                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1326                 :             :    stmts that constitute the pattern. In this case it will be:
    1327                 :             :         SAD_EXPR <x_t, y_t, sum_0>
    1328                 :             :   */
    1329                 :             : 
    1330                 :             : static gimple *
    1331                 :    27444049 : vect_recog_sad_pattern (vec_info *vinfo,
    1332                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1333                 :             : {
    1334                 :    27444049 :   gimple *last_stmt = stmt_vinfo->stmt;
    1335                 :    27444049 :   tree half_type;
    1336                 :             : 
    1337                 :             :   /* Look for the following pattern
    1338                 :             :           DX = (TYPE1) X;
    1339                 :             :           DY = (TYPE1) Y;
    1340                 :             :           DDIFF = DX - DY;
    1341                 :             :           DAD = ABS_EXPR <DDIFF>;
    1342                 :             :           DDPROD = (TYPE2) DPROD;
    1343                 :             :           sum_1 = DAD + sum_0;
    1344                 :             :      In which
    1345                 :             :      - DX is at least double the size of X
    1346                 :             :      - DY is at least double the size of Y
    1347                 :             :      - DX, DY, DDIFF, DAD all have the same type
    1348                 :             :      - sum is the same size of DAD or bigger
    1349                 :             :      - sum has been recognized as a reduction variable.
    1350                 :             : 
    1351                 :             :      This is equivalent to:
    1352                 :             :        DDIFF = X w- Y;          #widen sub
    1353                 :             :        DAD = ABS_EXPR <DDIFF>;
    1354                 :             :        sum_1 = DAD w+ sum_0;    #widen summation
    1355                 :             :      or
    1356                 :             :        DDIFF = X w- Y;          #widen sub
    1357                 :             :        DAD = ABS_EXPR <DDIFF>;
    1358                 :             :        sum_1 = DAD + sum_0;     #summation
    1359                 :             :    */
    1360                 :             : 
    1361                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1362                 :             :      of the above pattern.  */
    1363                 :             : 
    1364                 :    27444049 :   tree plus_oprnd0, plus_oprnd1;
    1365                 :    27444049 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1366                 :             :                                        &plus_oprnd0, &plus_oprnd1))
    1367                 :             :     return NULL;
    1368                 :             : 
    1369                 :       32461 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1370                 :             : 
    1371                 :             :   /* Any non-truncating sequence of conversions is OK here, since
    1372                 :             :      with a successful match, the result of the ABS(U) is known to fit
    1373                 :             :      within the nonnegative range of the result type.  (It cannot be the
    1374                 :             :      negative of the minimum signed value due to the range of the widening
    1375                 :             :      MINUS_EXPR.)  */
    1376                 :       32461 :   vect_unpromoted_value unprom_abs;
    1377                 :       32461 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1378                 :             :                                                       &unprom_abs);
    1379                 :             : 
    1380                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1381                 :             :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1382                 :             :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1383                 :             :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1384                 :             : 
    1385                 :       32461 :   if (!plus_oprnd0)
    1386                 :             :     return NULL;
    1387                 :             : 
    1388                 :       24253 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1389                 :       24253 :   if (!abs_stmt_vinfo)
    1390                 :             :     return NULL;
    1391                 :             : 
    1392                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1393                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1394                 :       23544 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1395                 :       70632 :   vect_unpromoted_value unprom[2];
    1396                 :             : 
    1397                 :       23544 :   if (!abs_stmt)
    1398                 :             :     {
    1399                 :    27444102 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1400                 :         279 :       if (!abd_stmt
    1401                 :         279 :           || !gimple_call_internal_p (abd_stmt)
    1402                 :           0 :           || gimple_call_num_args (abd_stmt) != 2)
    1403                 :             :         return NULL;
    1404                 :             : 
    1405                 :           0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1406                 :           0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1407                 :             : 
    1408                 :           0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
    1409                 :             :         {
    1410                 :           0 :           if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
    1411                 :             :                                                      &unprom[0])
    1412                 :           0 :               || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
    1413                 :             :                                                         &unprom[1]))
    1414                 :           0 :             return NULL;
    1415                 :             :         }
    1416                 :           0 :       else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1417                 :             :         {
    1418                 :           0 :           unprom[0].op = abd_oprnd0;
    1419                 :           0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1420                 :           0 :           unprom[1].op = abd_oprnd1;
    1421                 :           0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1422                 :             :         }
    1423                 :             :       else
    1424                 :             :         return NULL;
    1425                 :             : 
    1426                 :           0 :       half_type = unprom[0].type;
    1427                 :             :     }
    1428                 :       23214 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1429                 :             :                                             unprom, NULL))
    1430                 :             :     return NULL;
    1431                 :             : 
    1432                 :         367 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1433                 :             : 
    1434                 :         367 :   tree half_vectype;
    1435                 :         367 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1436                 :             :                                         type_out, &half_vectype))
    1437                 :             :     return NULL;
    1438                 :             : 
    1439                 :             :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1440                 :         226 :   tree sad_oprnd[2];
    1441                 :         226 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1442                 :             :                        unprom, half_vectype);
    1443                 :             : 
    1444                 :         226 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1445                 :         226 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1446                 :             :                                               sad_oprnd[1], plus_oprnd1);
    1447                 :             : 
    1448                 :         226 :   return pattern_stmt;
    1449                 :             : }
    1450                 :             : 
    1451                 :             : /* Function vect_recog_abd_pattern
    1452                 :             : 
    1453                 :             :    Try to find the following ABsolute Difference (ABD) or
    1454                 :             :    widening ABD (WIDEN_ABD) pattern:
    1455                 :             : 
    1456                 :             :    TYPE1 x;
    1457                 :             :    TYPE2 y;
    1458                 :             :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1459                 :             :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1460                 :             :    TYPE3 diff = x_cast - y_cast;
    1461                 :             :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1462                 :             :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1463                 :             : 
    1464                 :             :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1465                 :             :    twice as wide as TYPE3.
    1466                 :             : 
    1467                 :             :    Input:
    1468                 :             : 
    1469                 :             :    * STMT_VINFO: The stmt from which the pattern search begins
    1470                 :             : 
    1471                 :             :    Output:
    1472                 :             : 
    1473                 :             :    * TYPE_OUT: The type of the output of this pattern
    1474                 :             : 
    1475                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1476                 :             :      stmts that constitute the pattern, principally:
    1477                 :             :         out = IFN_ABD (x, y)
    1478                 :             :         out = IFN_WIDEN_ABD (x, y)
    1479                 :             :  */
    1480                 :             : 
    1481                 :             : static gimple *
    1482                 :    27400617 : vect_recog_abd_pattern (vec_info *vinfo,
    1483                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1484                 :             : {
    1485                 :    46096280 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1486                 :    18695663 :   if (!last_stmt)
    1487                 :             :     return NULL;
    1488                 :             : 
    1489                 :    18695663 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1490                 :             : 
    1491                 :    56086989 :   vect_unpromoted_value unprom[2];
    1492                 :    18695663 :   gassign *diff_stmt = NULL;
    1493                 :    18695663 :   tree abd_in_type;
    1494                 :    18695663 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
    1495                 :             :                                        unprom, &diff_stmt))
    1496                 :             :     {
    1497                 :             :       /* We cannot try further without having a non-widening MINUS.  */
    1498                 :    18694777 :       if (!diff_stmt)
    1499                 :             :         return NULL;
    1500                 :             : 
    1501                 :        1052 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1502                 :        1052 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1503                 :        1052 :       abd_in_type = signed_type_for (out_type);
    1504                 :             :     }
    1505                 :             : 
    1506                 :        1938 :   tree abd_out_type = abd_in_type;
    1507                 :             : 
    1508                 :        1938 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1509                 :        1938 :   if (!vectype_in)
    1510                 :             :     return NULL;
    1511                 :             : 
    1512                 :        1672 :   internal_fn ifn = IFN_ABD;
    1513                 :        1672 :   tree vectype_out = vectype_in;
    1514                 :             : 
    1515                 :        1672 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1516                 :        1672 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1517                 :             :     {
    1518                 :         779 :       tree mid_type
    1519                 :         779 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1520                 :         779 :                                           TYPE_UNSIGNED (abd_in_type));
    1521                 :         779 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1522                 :             : 
    1523                 :         779 :       code_helper dummy_code;
    1524                 :         779 :       int dummy_int;
    1525                 :         779 :       auto_vec<tree> dummy_vec;
    1526                 :         779 :       if (mid_vectype
    1527                 :         779 :           && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
    1528                 :             :                                              stmt_vinfo, mid_vectype,
    1529                 :             :                                              vectype_in,
    1530                 :             :                                              &dummy_code, &dummy_code,
    1531                 :             :                                              &dummy_int, &dummy_vec))
    1532                 :             :         {
    1533                 :           0 :           ifn = IFN_VEC_WIDEN_ABD;
    1534                 :           0 :           abd_out_type = mid_type;
    1535                 :           0 :           vectype_out = mid_vectype;
    1536                 :             :         }
    1537                 :         779 :     }
    1538                 :             : 
    1539                 :         779 :   if (ifn == IFN_ABD
    1540                 :        1672 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1541                 :             :                                           OPTIMIZE_FOR_SPEED))
    1542                 :             :     return NULL;
    1543                 :             : 
    1544                 :           0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1545                 :             : 
    1546                 :           0 :   tree abd_oprnds[2];
    1547                 :           0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1548                 :             :                        abd_in_type, unprom, vectype_in);
    1549                 :             : 
    1550                 :           0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1551                 :             : 
    1552                 :           0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1553                 :           0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1554                 :             :                                                 abd_oprnds[0], abd_oprnds[1]);
    1555                 :           0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1556                 :           0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1557                 :             : 
    1558                 :           0 :   gimple *stmt = abd_stmt;
    1559                 :           0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1560                 :           0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1561                 :           0 :       && !TYPE_UNSIGNED (abd_out_type))
    1562                 :             :     {
    1563                 :           0 :       tree unsign = unsigned_type_for (abd_out_type);
    1564                 :           0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1565                 :           0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1566                 :             :     }
    1567                 :             : 
    1568                 :           0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1569                 :             : }
    1570                 :             : 
    1571                 :             : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1572                 :             :    so that it can be treated as though it had the form:
    1573                 :             : 
    1574                 :             :       A_TYPE a;
    1575                 :             :       B_TYPE b;
    1576                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1577                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1578                 :             :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1579                 :             :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1580                 :             :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1581                 :             : 
    1582                 :             :    Try to replace the pattern with:
    1583                 :             : 
    1584                 :             :       A_TYPE a;
    1585                 :             :       B_TYPE b;
    1586                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1587                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1588                 :             :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1589                 :             :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1590                 :             : 
    1591                 :             :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1592                 :             : 
    1593                 :             :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1594                 :             :    name of the pattern being matched, for dump purposes.  */
    1595                 :             : 
    1596                 :             : static gimple *
    1597                 :   110173964 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1598                 :             :                              stmt_vec_info last_stmt_info, tree *type_out,
    1599                 :             :                              tree_code orig_code, code_helper wide_code,
    1600                 :             :                              bool shift_p, const char *name)
    1601                 :             : {
    1602                 :   110173964 :   gimple *last_stmt = last_stmt_info->stmt;
    1603                 :             : 
    1604                 :   330521892 :   vect_unpromoted_value unprom[2];
    1605                 :   110173964 :   tree half_type;
    1606                 :   110173964 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1607                 :             :                              shift_p, 2, unprom, &half_type))
    1608                 :             : 
    1609                 :             :     return NULL;
    1610                 :             : 
    1611                 :             :   /* Pattern detected.  */
    1612                 :      280630 :   vect_pattern_detected (name, last_stmt);
    1613                 :             : 
    1614                 :      280630 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1615                 :      280630 :   tree itype = type;
    1616                 :      280630 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1617                 :      280630 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1618                 :      197805 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1619                 :      197805 :                                             TYPE_UNSIGNED (half_type));
    1620                 :             : 
    1621                 :             :   /* Check target support  */
    1622                 :      280630 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1623                 :      280630 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1624                 :      280630 :   tree ctype = itype;
    1625                 :      280630 :   tree vecctype = vecitype;
    1626                 :      280630 :   if (orig_code == MINUS_EXPR
    1627                 :        7779 :       && TYPE_UNSIGNED (itype)
    1628                 :      284169 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1629                 :             :     {
    1630                 :             :       /* Subtraction is special, even if half_type is unsigned and no matter
    1631                 :             :          whether type is signed or unsigned, if type is wider than itype,
    1632                 :             :          we need to sign-extend from the widening operation result to the
    1633                 :             :          result type.
    1634                 :             :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1635                 :             :          itype unsigned short and type either int or unsigned int.
    1636                 :             :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1637                 :             :          (unsigned short) 0xffff, but for type int we want the result -1
    1638                 :             :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1639                 :         578 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1640                 :         578 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1641                 :             :     }
    1642                 :             : 
    1643                 :      280630 :   code_helper dummy_code;
    1644                 :      280630 :   int dummy_int;
    1645                 :      280630 :   auto_vec<tree> dummy_vec;
    1646                 :      280630 :   if (!vectype
    1647                 :      280630 :       || !vecitype
    1648                 :      221221 :       || !vecctype
    1649                 :      501851 :       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
    1650                 :             :                                           vecitype, vectype,
    1651                 :             :                                           &dummy_code, &dummy_code,
    1652                 :             :                                           &dummy_int, &dummy_vec))
    1653                 :      191297 :     return NULL;
    1654                 :             : 
    1655                 :       89333 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1656                 :       89333 :   if (!*type_out)
    1657                 :             :     return NULL;
    1658                 :             : 
    1659                 :       89333 :   tree oprnd[2];
    1660                 :       89333 :   vect_convert_inputs (vinfo, last_stmt_info,
    1661                 :             :                        2, oprnd, half_type, unprom, vectype);
    1662                 :             : 
    1663                 :       89333 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1664                 :       89333 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1665                 :             : 
    1666                 :       89333 :   if (vecctype != vecitype)
    1667                 :           0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1668                 :             :                                         pattern_stmt, vecitype);
    1669                 :             : 
    1670                 :       89333 :   return vect_convert_output (vinfo, last_stmt_info,
    1671                 :       89333 :                               type, pattern_stmt, vecctype);
    1672                 :      280630 : }
    1673                 :             : 
    1674                 :             : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1675                 :             :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1676                 :             : 
    1677                 :             : static gimple *
    1678                 :    27465760 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1679                 :             :                                tree *type_out)
    1680                 :             : {
    1681                 :    27465760 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1682                 :             :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1683                 :    27465760 :                                       "vect_recog_widen_mult_pattern");
    1684                 :             : }
    1685                 :             : 
    1686                 :             : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1687                 :             :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1688                 :             : 
    1689                 :             : static gimple *
    1690                 :    27632087 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1691                 :             :                                tree *type_out)
    1692                 :             : {
    1693                 :    27632087 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1694                 :             :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1695                 :    27632087 :                                       false, "vect_recog_widen_plus_pattern");
    1696                 :             : }
    1697                 :             : 
    1698                 :             : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1699                 :             :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1700                 :             : static gimple *
    1701                 :    27632087 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1702                 :             :                                tree *type_out)
    1703                 :             : {
    1704                 :    27632087 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1705                 :             :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1706                 :    27632087 :                                       false, "vect_recog_widen_minus_pattern");
    1707                 :             : }
    1708                 :             : 
    1709                 :             : /* Try to detect abd on widened inputs, converting IFN_ABD
    1710                 :             :    to IFN_VEC_WIDEN_ABD.  */
    1711                 :             : static gimple *
    1712                 :    27632087 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1713                 :             :                               tree *type_out)
    1714                 :             : {
    1715                 :    27632087 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1716                 :    25601599 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1717                 :             :     return NULL;
    1718                 :             : 
    1719                 :     2592850 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1720                 :             : 
    1721                 :     2592850 :   tree in_type = TREE_TYPE (last_rhs);
    1722                 :     2592850 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1723                 :     2592850 :   if (!INTEGRAL_TYPE_P (in_type)
    1724                 :     2322212 :       || !INTEGRAL_TYPE_P (out_type)
    1725                 :     2232119 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1726                 :     3118823 :       || !TYPE_UNSIGNED (in_type))
    1727                 :             :     return NULL;
    1728                 :             : 
    1729                 :      179921 :   vect_unpromoted_value unprom;
    1730                 :      179921 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1731                 :      179921 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1732                 :             :     return NULL;
    1733                 :             : 
    1734                 :      179348 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1735                 :      179348 :   if (!abd_pattern_vinfo)
    1736                 :             :     return NULL;
    1737                 :             : 
    1738                 :    27641218 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1739                 :        9131 :   if (!abd_stmt
    1740                 :        9131 :       || !gimple_call_internal_p (abd_stmt)
    1741                 :         250 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1742                 :             :     return NULL;
    1743                 :             : 
    1744                 :           0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1745                 :           0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1746                 :             : 
    1747                 :           0 :   code_helper dummy_code;
    1748                 :           0 :   int dummy_int;
    1749                 :           0 :   auto_vec<tree> dummy_vec;
    1750                 :           0 :   if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
    1751                 :             :                                        vectype_out, vectype_in,
    1752                 :             :                                        &dummy_code, &dummy_code,
    1753                 :             :                                        &dummy_int, &dummy_vec))
    1754                 :             :     return NULL;
    1755                 :             : 
    1756                 :           0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1757                 :             : 
    1758                 :           0 :   *type_out = vectype_out;
    1759                 :             : 
    1760                 :           0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1761                 :           0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1762                 :           0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1763                 :           0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1764                 :             :                                                       abd_oprnd0, abd_oprnd1);
    1765                 :           0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1766                 :           0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1767                 :           0 :   return widen_abd_stmt;
    1768                 :           0 : }
    1769                 :             : 
    1770                 :             : /* Function vect_recog_ctz_ffs_pattern
    1771                 :             : 
    1772                 :             :    Try to find the following pattern:
    1773                 :             : 
    1774                 :             :    TYPE1 A;
    1775                 :             :    TYPE1 B;
    1776                 :             : 
    1777                 :             :    B = __builtin_ctz{,l,ll} (A);
    1778                 :             : 
    1779                 :             :    or
    1780                 :             : 
    1781                 :             :    B = __builtin_ffs{,l,ll} (A);
    1782                 :             : 
    1783                 :             :    Input:
    1784                 :             : 
    1785                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1786                 :             :    here it starts with B = __builtin_* (A);
    1787                 :             : 
    1788                 :             :    Output:
    1789                 :             : 
    1790                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1791                 :             : 
    1792                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1793                 :             :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1794                 :             : 
    1795                 :             : static gimple *
    1796                 :    27443934 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1797                 :             :                             tree *type_out)
    1798                 :             : {
    1799                 :    27443934 :   gimple *call_stmt = stmt_vinfo->stmt;
    1800                 :    27443934 :   gimple *pattern_stmt;
    1801                 :    27443934 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1802                 :    27443934 :   tree new_var;
    1803                 :    27443934 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1804                 :    27443934 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1805                 :    27443934 :   int val = 0, val_new = 0, val_cmp = 0;
    1806                 :    27443934 :   int prec;
    1807                 :    27443934 :   int sub = 0, add = 0;
    1808                 :    27443934 :   location_t loc;
    1809                 :             : 
    1810                 :    27443934 :   if (!is_gimple_call (call_stmt))
    1811                 :             :     return NULL;
    1812                 :             : 
    1813                 :     3289908 :   if (gimple_call_num_args (call_stmt) != 1
    1814                 :     3289908 :       && gimple_call_num_args (call_stmt) != 2)
    1815                 :             :     return NULL;
    1816                 :             : 
    1817                 :     1787287 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1818                 :     1787287 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1819                 :     1787287 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1820                 :     1787287 :   if (!lhs_oprnd)
    1821                 :             :     return NULL;
    1822                 :      895562 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1823                 :      895562 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1824                 :      305157 :       || !INTEGRAL_TYPE_P (rhs_type)
    1825                 :       45322 :       || !type_has_mode_precision_p (rhs_type)
    1826                 :      939448 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1827                 :      863046 :     return NULL;
    1828                 :             : 
    1829                 :       32516 :   switch (gimple_call_combined_fn (call_stmt))
    1830                 :             :     {
    1831                 :        1136 :     CASE_CFN_CTZ:
    1832                 :        1136 :       ifn = IFN_CTZ;
    1833                 :        1136 :       if (!gimple_call_internal_p (call_stmt)
    1834                 :        1136 :           || gimple_call_num_args (call_stmt) != 2)
    1835                 :             :         defined_at_zero = false;
    1836                 :             :       else
    1837                 :          48 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1838                 :             :       break;
    1839                 :             :     CASE_CFN_FFS:
    1840                 :             :       ifn = IFN_FFS;
    1841                 :             :       break;
    1842                 :             :     default:
    1843                 :             :       return NULL;
    1844                 :             :     }
    1845                 :             : 
    1846                 :        1296 :   prec = TYPE_PRECISION (rhs_type);
    1847                 :        1296 :   loc = gimple_location (call_stmt);
    1848                 :             : 
    1849                 :        1296 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1850                 :        1296 :   if (!vec_type)
    1851                 :             :     return NULL;
    1852                 :             : 
    1853                 :        1290 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1854                 :        1290 :   if (!vec_rhs_type)
    1855                 :             :     return NULL;
    1856                 :             : 
    1857                 :             :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1858                 :             :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1859                 :             :      popcount<vector_mode>2.  */
    1860                 :        1054 :   if (!vec_type
    1861                 :        1054 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1862                 :             :                                          OPTIMIZE_FOR_SPEED))
    1863                 :             :     return NULL;
    1864                 :             : 
    1865                 :        1054 :   if (ifn == IFN_FFS
    1866                 :        1054 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1867                 :             :                                          OPTIMIZE_FOR_SPEED))
    1868                 :             :     {
    1869                 :           0 :       ifnnew = IFN_CTZ;
    1870                 :           0 :       defined_at_zero_new
    1871                 :           0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1872                 :             :                                      val_new) == 2;
    1873                 :             :     }
    1874                 :        1054 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1875                 :             :                                            OPTIMIZE_FOR_SPEED))
    1876                 :             :     {
    1877                 :          88 :       ifnnew = IFN_CLZ;
    1878                 :          88 :       defined_at_zero_new
    1879                 :          88 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1880                 :             :                                      val_new) == 2;
    1881                 :             :     }
    1882                 :          88 :   if ((ifnnew == IFN_LAST
    1883                 :          88 :        || (defined_at_zero && !defined_at_zero_new))
    1884                 :         966 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1885                 :             :                                          OPTIMIZE_FOR_SPEED))
    1886                 :             :     {
    1887                 :             :       ifnnew = IFN_POPCOUNT;
    1888                 :             :       defined_at_zero_new = true;
    1889                 :             :       val_new = prec;
    1890                 :             :     }
    1891                 :        1018 :   if (ifnnew == IFN_LAST)
    1892                 :             :     return NULL;
    1893                 :             : 
    1894                 :         124 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1895                 :             : 
    1896                 :         124 :   val_cmp = val_new;
    1897                 :         124 :   if ((ifnnew == IFN_CLZ
    1898                 :         124 :        && defined_at_zero
    1899                 :          60 :        && defined_at_zero_new
    1900                 :          60 :        && val == prec
    1901                 :          31 :        && val_new == prec)
    1902                 :          93 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1903                 :             :     {
    1904                 :             :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1905                 :             :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1906                 :             :       if (ifnnew == IFN_CLZ)
    1907                 :             :         sub = prec;
    1908                 :          56 :       val_cmp = prec;
    1909                 :             : 
    1910                 :          56 :       if (!TYPE_UNSIGNED (rhs_type))
    1911                 :             :         {
    1912                 :          12 :           rhs_type = unsigned_type_for (rhs_type);
    1913                 :          12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1914                 :          12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1915                 :          12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1916                 :          12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1917                 :             :                                   vec_rhs_type);
    1918                 :          12 :           rhs_oprnd = new_var;
    1919                 :             :         }
    1920                 :             : 
    1921                 :          56 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1922                 :          56 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1923                 :          56 :                                           build_int_cst (rhs_type, -1));
    1924                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1925                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1926                 :             : 
    1927                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1928                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1929                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1930                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1931                 :          56 :       rhs_oprnd = new_var;
    1932                 :             : 
    1933                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1934                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1935                 :             :                                           m1, rhs_oprnd);
    1936                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1937                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1938                 :          56 :       rhs_oprnd = new_var;
    1939                 :          56 :     }
    1940                 :          68 :   else if (ifnnew == IFN_CLZ)
    1941                 :             :     {
    1942                 :             :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1943                 :             :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1944                 :          57 :       sub = prec - (ifn == IFN_CTZ);
    1945                 :          57 :       val_cmp = sub - val_new;
    1946                 :             : 
    1947                 :          57 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1948                 :          57 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1949                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1950                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1951                 :             : 
    1952                 :          57 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1953                 :          57 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1954                 :             :                                           rhs_oprnd, neg);
    1955                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1956                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1957                 :          57 :       rhs_oprnd = new_var;
    1958                 :             :     }
    1959                 :          11 :   else if (ifnnew == IFN_POPCOUNT)
    1960                 :             :     {
    1961                 :             :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1962                 :             :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1963                 :          11 :       sub = prec + (ifn == IFN_FFS);
    1964                 :          11 :       val_cmp = sub;
    1965                 :             : 
    1966                 :          11 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1967                 :          11 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1968                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1969                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1970                 :             : 
    1971                 :          11 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1972                 :          11 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    1973                 :             :                                           rhs_oprnd, neg);
    1974                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1975                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1976                 :          11 :       rhs_oprnd = new_var;
    1977                 :             :     }
    1978                 :           0 :   else if (ifnnew == IFN_CTZ)
    1979                 :             :     {
    1980                 :             :       /* .FFS (X) = .CTZ (X) + 1.  */
    1981                 :           0 :       add = 1;
    1982                 :           0 :       val_cmp++;
    1983                 :             :     }
    1984                 :             : 
    1985                 :             :   /* Create B = .IFNNEW (A).  */
    1986                 :         124 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    1987                 :         124 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    1988                 :          88 :     pattern_stmt
    1989                 :          88 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    1990                 :          88 :                                     build_int_cst (integer_type_node,
    1991                 :             :                                                    val_new));
    1992                 :             :   else
    1993                 :          36 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    1994                 :         124 :   gimple_call_set_lhs (pattern_stmt, new_var);
    1995                 :         124 :   gimple_set_location (pattern_stmt, loc);
    1996                 :         124 :   *type_out = vec_type;
    1997                 :             : 
    1998                 :         124 :   if (sub)
    1999                 :             :     {
    2000                 :          99 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2001                 :          99 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2002                 :          99 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2003                 :          99 :                                           build_int_cst (lhs_type, sub),
    2004                 :             :                                           new_var);
    2005                 :          99 :       gimple_set_location (pattern_stmt, loc);
    2006                 :          99 :       new_var = ret_var;
    2007                 :             :     }
    2008                 :          25 :   else if (add)
    2009                 :             :     {
    2010                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2011                 :           0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2012                 :           0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2013                 :           0 :                                           build_int_cst (lhs_type, add));
    2014                 :           0 :       gimple_set_location (pattern_stmt, loc);
    2015                 :           0 :       new_var = ret_var;
    2016                 :             :     }
    2017                 :             : 
    2018                 :         124 :   if (defined_at_zero
    2019                 :          88 :       && (!defined_at_zero_new || val != val_cmp))
    2020                 :             :     {
    2021                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2022                 :          11 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2023                 :          11 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2024                 :          11 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2025                 :          11 :       tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2026                 :          11 :       pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
    2027                 :             :                                           build_zero_cst (rhs_type));
    2028                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    2029                 :             :                               truth_type_for (vec_type), rhs_type);
    2030                 :          11 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2031                 :             :                                           new_var,
    2032                 :          11 :                                           build_int_cst (lhs_type, val));
    2033                 :             :     }
    2034                 :             : 
    2035                 :         124 :   if (dump_enabled_p ())
    2036                 :          36 :     dump_printf_loc (MSG_NOTE, vect_location,
    2037                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2038                 :             : 
    2039                 :             :   return pattern_stmt;
    2040                 :             : }
    2041                 :             : 
    2042                 :             : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2043                 :             : 
    2044                 :             :    Try to find the following pattern:
    2045                 :             : 
    2046                 :             :    UTYPE1 A;
    2047                 :             :    TYPE1 B;
    2048                 :             :    UTYPE2 temp_in;
    2049                 :             :    TYPE3 temp_out;
    2050                 :             :    temp_in = (UTYPE2)A;
    2051                 :             : 
    2052                 :             :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2053                 :             :    B = (TYPE1) temp_out;
    2054                 :             : 
    2055                 :             :    TYPE2 may or may not be equal to TYPE3.
    2056                 :             :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2057                 :             :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2058                 :             : 
    2059                 :             :    Input:
    2060                 :             : 
    2061                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2062                 :             :    here it starts with B = (TYPE1) temp_out;
    2063                 :             : 
    2064                 :             :    Output:
    2065                 :             : 
    2066                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2067                 :             : 
    2068                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2069                 :             :    stmts that constitute the pattern. In this case it will be:
    2070                 :             :    B = .POPCOUNT (A);
    2071                 :             : 
    2072                 :             :    Similarly for clz, ctz and ffs.
    2073                 :             : */
    2074                 :             : 
    2075                 :             : static gimple *
    2076                 :    27443814 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2077                 :             :                                          stmt_vec_info stmt_vinfo,
    2078                 :             :                                          tree *type_out)
    2079                 :             : {
    2080                 :    27443814 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2081                 :    18738680 :   gimple *call_stmt, *pattern_stmt;
    2082                 :    18738680 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2083                 :    46182335 :   internal_fn ifn = IFN_LAST;
    2084                 :    27443655 :   int addend = 0;
    2085                 :             : 
    2086                 :             :   /* Find B = (TYPE1) temp_out. */
    2087                 :    18738680 :   if (!last_stmt)
    2088                 :             :     return NULL;
    2089                 :    18738680 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2090                 :    18738680 :   if (!CONVERT_EXPR_CODE_P (code))
    2091                 :             :     return NULL;
    2092                 :             : 
    2093                 :     2558680 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2094                 :     2558680 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2095                 :     2558680 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2096                 :             :     return NULL;
    2097                 :             : 
    2098                 :     2408339 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2099                 :     2408339 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2100                 :     2408339 :       || !has_single_use (rhs_oprnd))
    2101                 :             :     return NULL;
    2102                 :     1259806 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2103                 :             : 
    2104                 :             :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2105                 :     1259806 :   if (!is_gimple_call (call_stmt))
    2106                 :             :     return NULL;
    2107                 :       95998 :   switch (gimple_call_combined_fn (call_stmt))
    2108                 :             :     {
    2109                 :             :       int val;
    2110                 :             :     CASE_CFN_POPCOUNT:
    2111                 :             :       ifn = IFN_POPCOUNT;
    2112                 :             :       break;
    2113                 :        2258 :     CASE_CFN_CLZ:
    2114                 :        2258 :       ifn = IFN_CLZ;
    2115                 :             :       /* Punt if call result is unsigned and defined value at zero
    2116                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2117                 :        2258 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2118                 :           0 :           && gimple_call_internal_p (call_stmt)
    2119                 :        2258 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2120                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2121                 :        2258 :           && val < 0)
    2122                 :             :         return NULL;
    2123                 :             :       break;
    2124                 :         578 :     CASE_CFN_CTZ:
    2125                 :         578 :       ifn = IFN_CTZ;
    2126                 :             :       /* Punt if call result is unsigned and defined value at zero
    2127                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2128                 :         578 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2129                 :           0 :           && gimple_call_internal_p (call_stmt)
    2130                 :         578 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2131                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2132                 :         578 :           && val < 0)
    2133                 :             :         return NULL;
    2134                 :             :       break;
    2135                 :          18 :     CASE_CFN_FFS:
    2136                 :          18 :       ifn = IFN_FFS;
    2137                 :          18 :       break;
    2138                 :             :     default:
    2139                 :             :       return NULL;
    2140                 :             :     }
    2141                 :             : 
    2142                 :        3110 :   if (gimple_call_num_args (call_stmt) != 1
    2143                 :        3110 :       && gimple_call_num_args (call_stmt) != 2)
    2144                 :             :     return NULL;
    2145                 :             : 
    2146                 :        3110 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2147                 :        3110 :   vect_unpromoted_value unprom_diff;
    2148                 :        3110 :   rhs_origin
    2149                 :        3110 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2150                 :             : 
    2151                 :        3110 :   if (!rhs_origin)
    2152                 :             :     return NULL;
    2153                 :             : 
    2154                 :             :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2155                 :        3110 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2156                 :             :     return NULL;
    2157                 :             : 
    2158                 :             :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2159                 :             :      different builtins/internal functions have different behaviors.  */
    2160                 :        1298 :   if (TYPE_PRECISION (unprom_diff.type)
    2161                 :        1298 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2162                 :         170 :     switch (ifn)
    2163                 :             :       {
    2164                 :          79 :       case IFN_POPCOUNT:
    2165                 :             :         /* For popcount require zero extension, which doesn't add any
    2166                 :             :            further bits to the count.  */
    2167                 :          79 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2168                 :             :           return NULL;
    2169                 :             :         break;
    2170                 :          73 :       case IFN_CLZ:
    2171                 :             :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2172                 :             :            if it is undefined at zero or if it matches also for the
    2173                 :             :            defined value there.  */
    2174                 :          73 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2175                 :             :           return NULL;
    2176                 :          73 :         if (!type_has_mode_precision_p (lhs_type)
    2177                 :          73 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2178                 :           0 :           return NULL;
    2179                 :          73 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2180                 :          73 :                   - TYPE_PRECISION (lhs_type));
    2181                 :          73 :         if (gimple_call_internal_p (call_stmt)
    2182                 :          73 :             && gimple_call_num_args (call_stmt) == 2)
    2183                 :             :           {
    2184                 :           0 :             int val1, val2;
    2185                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2186                 :           0 :             int d2
    2187                 :           0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2188                 :             :                                            val2);
    2189                 :           0 :             if (d2 != 2 || val1 != val2 + addend)
    2190                 :             :               return NULL;
    2191                 :             :           }
    2192                 :             :         break;
    2193                 :          13 :       case IFN_CTZ:
    2194                 :             :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2195                 :             :            if it is undefined at zero or if it matches also for the
    2196                 :             :            defined value there.  */
    2197                 :          13 :         if (gimple_call_internal_p (call_stmt)
    2198                 :          13 :             && gimple_call_num_args (call_stmt) == 2)
    2199                 :             :           {
    2200                 :           0 :             int val1, val2;
    2201                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2202                 :           0 :             int d2
    2203                 :           0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2204                 :             :                                            val2);
    2205                 :           0 :             if (d2 != 2 || val1 != val2)
    2206                 :             :               return NULL;
    2207                 :             :           }
    2208                 :             :         break;
    2209                 :             :       case IFN_FFS:
    2210                 :             :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2211                 :             :         break;
    2212                 :           0 :       default:
    2213                 :           0 :         gcc_unreachable ();
    2214                 :             :       }
    2215                 :             : 
    2216                 :        1298 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2217                 :             :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2218                 :        1298 :   if (!vec_type)
    2219                 :             :     return NULL;
    2220                 :             : 
    2221                 :        1160 :   bool supported
    2222                 :        1160 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2223                 :        1160 :   if (!supported)
    2224                 :        1057 :     switch (ifn)
    2225                 :             :       {
    2226                 :             :       case IFN_POPCOUNT:
    2227                 :             :       case IFN_CLZ:
    2228                 :             :         return NULL;
    2229                 :          18 :       case IFN_FFS:
    2230                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2231                 :          18 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2232                 :             :                                             OPTIMIZE_FOR_SPEED))
    2233                 :             :           break;
    2234                 :             :         /* FALLTHRU */
    2235                 :         365 :       case IFN_CTZ:
    2236                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2237                 :             :            clz or popcount.  */
    2238                 :         365 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2239                 :             :                                             OPTIMIZE_FOR_SPEED))
    2240                 :             :           break;
    2241                 :         331 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2242                 :             :                                             OPTIMIZE_FOR_SPEED))
    2243                 :             :           break;
    2244                 :             :         return NULL;
    2245                 :           0 :       default:
    2246                 :           0 :         gcc_unreachable ();
    2247                 :             :       }
    2248                 :             : 
    2249                 :         159 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2250                 :             :                          call_stmt);
    2251                 :             : 
    2252                 :             :   /* Create B = .POPCOUNT (A).  */
    2253                 :         159 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2254                 :         159 :   tree arg2 = NULL_TREE;
    2255                 :         159 :   int val;
    2256                 :         159 :   if (ifn == IFN_CLZ
    2257                 :         191 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2258                 :             :                                     val) == 2)
    2259                 :          30 :     arg2 = build_int_cst (integer_type_node, val);
    2260                 :         129 :   else if (ifn == IFN_CTZ
    2261                 :         167 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2262                 :             :                                          val) == 2)
    2263                 :          38 :     arg2 = build_int_cst (integer_type_node, val);
    2264                 :         159 :   if (arg2)
    2265                 :          68 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2266                 :             :   else
    2267                 :          91 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2268                 :         159 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2269                 :         159 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2270                 :         159 :   *type_out = vec_type;
    2271                 :             : 
    2272                 :         159 :   if (dump_enabled_p ())
    2273                 :          24 :     dump_printf_loc (MSG_NOTE, vect_location,
    2274                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2275                 :             : 
    2276                 :         159 :   if (addend)
    2277                 :             :     {
    2278                 :           6 :       gcc_assert (supported);
    2279                 :           6 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2280                 :           6 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2281                 :           6 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2282                 :           6 :                                           build_int_cst (lhs_type, addend));
    2283                 :             :     }
    2284                 :         153 :   else if (!supported)
    2285                 :             :     {
    2286                 :          56 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2287                 :          56 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2288                 :          56 :       pattern_stmt
    2289                 :          56 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2290                 :          56 :       if (pattern_stmt == NULL)
    2291                 :             :         return NULL;
    2292                 :          56 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2293                 :             :         {
    2294                 :          56 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2295                 :          56 :           gimple_seq_add_seq_without_update (pseq, seq);
    2296                 :             :         }
    2297                 :             :     }
    2298                 :             :   return pattern_stmt;
    2299                 :             : }
    2300                 :             : 
    2301                 :             : /* Function vect_recog_pow_pattern
    2302                 :             : 
    2303                 :             :    Try to find the following pattern:
    2304                 :             : 
    2305                 :             :      x = POW (y, N);
    2306                 :             : 
    2307                 :             :    with POW being one of pow, powf, powi, powif and N being
    2308                 :             :    either 2 or 0.5.
    2309                 :             : 
    2310                 :             :    Input:
    2311                 :             : 
    2312                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2313                 :             : 
    2314                 :             :    Output:
    2315                 :             : 
    2316                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2317                 :             : 
    2318                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2319                 :             :    stmts that constitute the pattern. In this case it will be:
    2320                 :             :         x = x * x
    2321                 :             :    or
    2322                 :             :         x = sqrt (x)
    2323                 :             : */
    2324                 :             : 
    2325                 :             : static gimple *
    2326                 :    27443823 : vect_recog_pow_pattern (vec_info *vinfo,
    2327                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2328                 :             : {
    2329                 :    27443823 :   gimple *last_stmt = stmt_vinfo->stmt;
    2330                 :    27443823 :   tree base, exp;
    2331                 :    27443823 :   gimple *stmt;
    2332                 :    27443823 :   tree var;
    2333                 :             : 
    2334                 :    27443823 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2335                 :             :     return NULL;
    2336                 :             : 
    2337                 :     1415203 :   switch (gimple_call_combined_fn (last_stmt))
    2338                 :             :     {
    2339                 :         274 :     CASE_CFN_POW:
    2340                 :         274 :     CASE_CFN_POWI:
    2341                 :         274 :       break;
    2342                 :             : 
    2343                 :             :     default:
    2344                 :             :       return NULL;
    2345                 :             :     }
    2346                 :             : 
    2347                 :         274 :   base = gimple_call_arg (last_stmt, 0);
    2348                 :         274 :   exp = gimple_call_arg (last_stmt, 1);
    2349                 :         274 :   if (TREE_CODE (exp) != REAL_CST
    2350                 :         248 :       && TREE_CODE (exp) != INTEGER_CST)
    2351                 :             :     {
    2352                 :         248 :       if (flag_unsafe_math_optimizations
    2353                 :          26 :           && TREE_CODE (base) == REAL_CST
    2354                 :         250 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2355                 :             :         {
    2356                 :           2 :           combined_fn log_cfn;
    2357                 :           2 :           built_in_function exp_bfn;
    2358                 :           2 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2359                 :             :             {
    2360                 :             :             case BUILT_IN_POW:
    2361                 :             :               log_cfn = CFN_BUILT_IN_LOG;
    2362                 :             :               exp_bfn = BUILT_IN_EXP;
    2363                 :             :               break;
    2364                 :           0 :             case BUILT_IN_POWF:
    2365                 :           0 :               log_cfn = CFN_BUILT_IN_LOGF;
    2366                 :           0 :               exp_bfn = BUILT_IN_EXPF;
    2367                 :           0 :               break;
    2368                 :           0 :             case BUILT_IN_POWL:
    2369                 :           0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2370                 :           0 :               exp_bfn = BUILT_IN_EXPL;
    2371                 :           0 :               break;
    2372                 :             :             default:
    2373                 :             :               return NULL;
    2374                 :             :             }
    2375                 :           2 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2376                 :           2 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2377                 :             :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2378                 :             :              does that, but if C is a power of 2, we want to use
    2379                 :             :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2380                 :             :              vectorization we don't have vectorized exp2.  */
    2381                 :           2 :           if (logc
    2382                 :           2 :               && TREE_CODE (logc) == REAL_CST
    2383                 :           2 :               && exp_decl
    2384                 :           4 :               && lookup_attribute ("omp declare simd",
    2385                 :           2 :                                    DECL_ATTRIBUTES (exp_decl)))
    2386                 :             :             {
    2387                 :           2 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2388                 :           2 :               if (node->simd_clones == NULL)
    2389                 :             :                 {
    2390                 :           2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2391                 :           2 :                       || node->definition)
    2392                 :             :                     return NULL;
    2393                 :           2 :                   expand_simd_clones (node);
    2394                 :           2 :                   if (node->simd_clones == NULL)
    2395                 :             :                     return NULL;
    2396                 :             :                 }
    2397                 :           2 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2398                 :           2 :               if (!*type_out)
    2399                 :             :                 return NULL;
    2400                 :           2 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2401                 :           2 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2402                 :           2 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2403                 :           2 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2404                 :           2 :               g = gimple_build_call (exp_decl, 1, def);
    2405                 :           2 :               gimple_call_set_lhs (g, res);
    2406                 :           2 :               return g;
    2407                 :             :             }
    2408                 :             :         }
    2409                 :             : 
    2410                 :         246 :       return NULL;
    2411                 :             :     }
    2412                 :             : 
    2413                 :             :   /* We now have a pow or powi builtin function call with a constant
    2414                 :             :      exponent.  */
    2415                 :             : 
    2416                 :             :   /* Catch squaring.  */
    2417                 :          26 :   if ((tree_fits_shwi_p (exp)
    2418                 :           0 :        && tree_to_shwi (exp) == 2)
    2419                 :          26 :       || (TREE_CODE (exp) == REAL_CST
    2420                 :          26 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2421                 :             :     {
    2422                 :           0 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2423                 :           0 :                                             TREE_TYPE (base), type_out))
    2424                 :             :         return NULL;
    2425                 :             : 
    2426                 :           0 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2427                 :           0 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2428                 :           0 :       return stmt;
    2429                 :             :     }
    2430                 :             : 
    2431                 :             :   /* Catch square root.  */
    2432                 :          26 :   if (TREE_CODE (exp) == REAL_CST
    2433                 :          26 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2434                 :             :     {
    2435                 :          11 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2436                 :          11 :       if (*type_out
    2437                 :          11 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2438                 :             :                                              OPTIMIZE_FOR_SPEED))
    2439                 :             :         {
    2440                 :           9 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2441                 :           9 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2442                 :           9 :           gimple_call_set_lhs (stmt, var);
    2443                 :           9 :           gimple_call_set_nothrow (stmt, true);
    2444                 :           9 :           return stmt;
    2445                 :             :         }
    2446                 :             :     }
    2447                 :             : 
    2448                 :             :   return NULL;
    2449                 :             : }
    2450                 :             : 
    2451                 :             : 
    2452                 :             : /* Function vect_recog_widen_sum_pattern
    2453                 :             : 
    2454                 :             :    Try to find the following pattern:
    2455                 :             : 
    2456                 :             :      type x_t;
    2457                 :             :      TYPE x_T, sum = init;
    2458                 :             :    loop:
    2459                 :             :      sum_0 = phi <init, sum_1>
    2460                 :             :      S1  x_t = *p;
    2461                 :             :      S2  x_T = (TYPE) x_t;
    2462                 :             :      S3  sum_1 = x_T + sum_0;
    2463                 :             : 
    2464                 :             :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2465                 :             :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2466                 :             :    a special case of a reduction computation.
    2467                 :             : 
    2468                 :             :    Input:
    2469                 :             : 
    2470                 :             :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2471                 :             :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2472                 :             : 
    2473                 :             :    Output:
    2474                 :             : 
    2475                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2476                 :             : 
    2477                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2478                 :             :    stmts that constitute the pattern. In this case it will be:
    2479                 :             :         WIDEN_SUM <x_t, sum_0>
    2480                 :             : 
    2481                 :             :    Note: The widening-sum idiom is a widening reduction pattern that is
    2482                 :             :          vectorized without preserving all the intermediate results. It
    2483                 :             :          produces only N/2 (widened) results (by summing up pairs of
    2484                 :             :          intermediate results) rather than all N results.  Therefore, we
    2485                 :             :          cannot allow this pattern when we want to get all the results and in
    2486                 :             :          the correct order (as is the case when this computation is in an
    2487                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    2488                 :             : 
    2489                 :             : static gimple *
    2490                 :    27443823 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2491                 :             :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2492                 :             : {
    2493                 :    27443823 :   gimple *last_stmt = stmt_vinfo->stmt;
    2494                 :    27443823 :   tree oprnd0, oprnd1;
    2495                 :    27443823 :   tree type;
    2496                 :    27443823 :   gimple *pattern_stmt;
    2497                 :    27443823 :   tree var;
    2498                 :             : 
    2499                 :             :   /* Look for the following pattern
    2500                 :             :           DX = (TYPE) X;
    2501                 :             :           sum_1 = DX + sum_0;
    2502                 :             :      In which DX is at least double the size of X, and sum_1 has been
    2503                 :             :      recognized as a reduction variable.
    2504                 :             :    */
    2505                 :             : 
    2506                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2507                 :             :      of the above pattern.  */
    2508                 :             : 
    2509                 :    27443823 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2510                 :             :                                        &oprnd0, &oprnd1)
    2511                 :       32235 :       || TREE_CODE (oprnd0) != SSA_NAME
    2512                 :    27475881 :       || !vinfo->lookup_def (oprnd0))
    2513                 :    27411823 :     return NULL;
    2514                 :             : 
    2515                 :       32000 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2516                 :             : 
    2517                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2518                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2519                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2520                 :             :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2521                 :             :      'TYPE'.  */
    2522                 :             : 
    2523                 :       32000 :   vect_unpromoted_value unprom0;
    2524                 :       32000 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2525                 :       32000 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2526                 :             :     return NULL;
    2527                 :             : 
    2528                 :        1692 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2529                 :             : 
    2530                 :        1692 :   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2531                 :             :                                         unprom0.type, type_out))
    2532                 :             :     return NULL;
    2533                 :             : 
    2534                 :           0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2535                 :           0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2536                 :             : 
    2537                 :           0 :   return pattern_stmt;
    2538                 :             : }
    2539                 :             : 
    2540                 :             : /* Function vect_recog_bitfield_ref_pattern
    2541                 :             : 
    2542                 :             :    Try to find the following pattern:
    2543                 :             : 
    2544                 :             :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2545                 :             :    result = (type_out) bf_value;
    2546                 :             : 
    2547                 :             :    or
    2548                 :             : 
    2549                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2550                 :             : 
    2551                 :             :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2552                 :             :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2553                 :             : 
    2554                 :             :    Input:
    2555                 :             : 
    2556                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2557                 :             :    here it starts with:
    2558                 :             :    result = (type_out) bf_value;
    2559                 :             : 
    2560                 :             :    or
    2561                 :             : 
    2562                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2563                 :             : 
    2564                 :             :    Output:
    2565                 :             : 
    2566                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2567                 :             : 
    2568                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2569                 :             :    stmts that constitute the pattern. If the precision of type_out is bigger
    2570                 :             :    than the precision type of _1 we perform the widening before the shifting,
    2571                 :             :    since the new precision will be large enough to shift the value and moving
    2572                 :             :    widening operations up the statement chain enables the generation of
    2573                 :             :    widening loads.  If we are widening and the operation after the pattern is
    2574                 :             :    an addition then we mask first and shift later, to enable the generation of
    2575                 :             :    shifting adds.  In the case of narrowing we will always mask first, shift
    2576                 :             :    last and then perform a narrowing operation.  This will enable the
    2577                 :             :    generation of narrowing shifts.
    2578                 :             : 
    2579                 :             :    Widening with mask first, shift later:
    2580                 :             :    container = (type_out) container;
    2581                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2582                 :             :    result = masked >> bitpos;
    2583                 :             : 
    2584                 :             :    Widening with shift first, mask last:
    2585                 :             :    container = (type_out) container;
    2586                 :             :    shifted = container >> bitpos;
    2587                 :             :    result = shifted & ((1 << bitsize) - 1);
    2588                 :             : 
    2589                 :             :    Narrowing:
    2590                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2591                 :             :    result = masked >> bitpos;
    2592                 :             :    result = (type_out) result;
    2593                 :             : 
    2594                 :             :    If the bitfield is signed and it's wider than type_out, we need to
    2595                 :             :    keep the result sign-extended:
    2596                 :             :    container = (type) container;
    2597                 :             :    masked = container << (prec - bitsize - bitpos);
    2598                 :             :    result = (type_out) (masked >> (prec - bitsize));
    2599                 :             : 
    2600                 :             :    Here type is the signed variant of the wider of type_out and the type
    2601                 :             :    of container.
    2602                 :             : 
    2603                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2604                 :             : 
    2605                 :             :    When the original bitfield was inside a gcond then an new gcond is also
    2606                 :             :    generated with the newly `result` as the operand to the comparison.
    2607                 :             : 
    2608                 :             : */
    2609                 :             : 
    2610                 :             : static gimple *
    2611                 :    27397249 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2612                 :             :                                  tree *type_out)
    2613                 :             : {
    2614                 :    27397249 :   gimple *bf_stmt = NULL;
    2615                 :    27397249 :   tree lhs = NULL_TREE;
    2616                 :    27397249 :   tree ret_type = NULL_TREE;
    2617                 :    27397249 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2618                 :    27397249 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2619                 :             :     {
    2620                 :     4513539 :       tree op = gimple_cond_lhs (cond_stmt);
    2621                 :     4513539 :       if (TREE_CODE (op) != SSA_NAME)
    2622                 :             :         return NULL;
    2623                 :     4443878 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2624                 :     4443878 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2625                 :             :         return NULL;
    2626                 :             :     }
    2627                 :    22883710 :   else if (is_gimple_assign (stmt)
    2628                 :    18691700 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2629                 :    25377877 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2630                 :             :     {
    2631                 :     2452377 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2632                 :     2452377 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2633                 :     2452377 :       lhs = gimple_assign_lhs (stmt);
    2634                 :     2452377 :       ret_type = TREE_TYPE (lhs);
    2635                 :             :     }
    2636                 :             : 
    2637                 :     5447591 :   if (!bf_stmt
    2638                 :     5447591 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2639                 :             :     return NULL;
    2640                 :             : 
    2641                 :       13862 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2642                 :       13862 :   tree container = TREE_OPERAND (bf_ref, 0);
    2643                 :       13862 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2644                 :             : 
    2645                 :       13862 :   if (!bit_field_offset (bf_ref).is_constant ()
    2646                 :       13862 :       || !bit_field_size (bf_ref).is_constant ()
    2647                 :       13862 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2648                 :           0 :     return NULL;
    2649                 :             : 
    2650                 :       27346 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2651                 :       13860 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2652                 :       15837 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2653                 :       11887 :     return NULL;
    2654                 :             : 
    2655                 :        1975 :   gimple *use_stmt, *pattern_stmt;
    2656                 :        1975 :   use_operand_p use_p;
    2657                 :        1975 :   bool shift_first = true;
    2658                 :        1975 :   tree container_type = TREE_TYPE (container);
    2659                 :        1975 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2660                 :             : 
    2661                 :             :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2662                 :             :      the container may change and we have to consider offset change for
    2663                 :             :      widening loads on big endianness.  The shift_n calculated here can be
    2664                 :             :      independent of widening.  */
    2665                 :        1975 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2666                 :        1975 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2667                 :        1975 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2668                 :        1975 :   if (BYTES_BIG_ENDIAN)
    2669                 :             :     shift_n = prec - shift_n - mask_width;
    2670                 :             : 
    2671                 :        1975 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2672                 :        1353 :                    TYPE_PRECISION (ret_type) > mask_width);
    2673                 :        1975 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2674                 :        1975 :                      TYPE_PRECISION (ret_type));
    2675                 :             : 
    2676                 :             :   /* We move the conversion earlier if the loaded type is smaller than the
    2677                 :             :      return type to enable the use of widening loads.  And if we need a
    2678                 :             :      sign extension, we need to convert the loaded value early to a signed
    2679                 :             :      type as well.  */
    2680                 :        1975 :   if (ref_sext || load_widen)
    2681                 :             :     {
    2682                 :         919 :       tree type = load_widen ? ret_type : container_type;
    2683                 :         919 :       if (ref_sext)
    2684                 :         891 :         type = gimple_signed_type (type);
    2685                 :         919 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2686                 :             :                                           NOP_EXPR, container);
    2687                 :         919 :       container = gimple_get_lhs (pattern_stmt);
    2688                 :         919 :       container_type = TREE_TYPE (container);
    2689                 :         919 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2690                 :         919 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2691                 :         919 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2692                 :             :     }
    2693                 :        1056 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2694                 :             :     /* If we are doing the conversion last then also delay the shift as we may
    2695                 :             :        be able to combine the shift and conversion in certain cases.  */
    2696                 :             :     shift_first = false;
    2697                 :             : 
    2698                 :             :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2699                 :             :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2700                 :             :      add into a single instruction.  */
    2701                 :        1272 :   if (lhs && !is_pattern_stmt_p (stmt_info)
    2702                 :        3247 :       && single_imm_use (lhs, &use_p, &use_stmt))
    2703                 :             :     {
    2704                 :         917 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2705                 :         917 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2706                 :             :         shift_first = false;
    2707                 :             :     }
    2708                 :             : 
    2709                 :             :   /* If we don't have to shift we only generate the mask, so just fix the
    2710                 :             :      code-path to shift_first.  */
    2711                 :        1975 :   if (shift_n == 0)
    2712                 :         687 :     shift_first = true;
    2713                 :             : 
    2714                 :        1975 :   tree result;
    2715                 :        1975 :   if (shift_first && !ref_sext)
    2716                 :             :     {
    2717                 :         426 :       tree shifted = container;
    2718                 :         426 :       if (shift_n)
    2719                 :             :         {
    2720                 :          53 :           pattern_stmt
    2721                 :          53 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2722                 :             :                                    RSHIFT_EXPR, container,
    2723                 :          53 :                                    build_int_cst (sizetype, shift_n));
    2724                 :          53 :           shifted = gimple_assign_lhs (pattern_stmt);
    2725                 :          53 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2726                 :             :         }
    2727                 :             : 
    2728                 :         426 :       tree mask = wide_int_to_tree (container_type,
    2729                 :         426 :                                     wi::mask (mask_width, false, prec));
    2730                 :             : 
    2731                 :         426 :       pattern_stmt
    2732                 :         426 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2733                 :             :                                BIT_AND_EXPR, shifted, mask);
    2734                 :         426 :       result = gimple_assign_lhs (pattern_stmt);
    2735                 :             :     }
    2736                 :             :   else
    2737                 :             :     {
    2738                 :        1549 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2739                 :        1549 :       if (!ref_sext)
    2740                 :             :         {
    2741                 :         658 :           tree mask = wide_int_to_tree (container_type,
    2742                 :         658 :                                         wi::shifted_mask (shift_n,
    2743                 :             :                                                           mask_width,
    2744                 :             :                                                           false, prec));
    2745                 :         658 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2746                 :             :                                               container, mask);
    2747                 :             :         }
    2748                 :             :       else
    2749                 :             :         {
    2750                 :         891 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2751                 :         891 :           shift_n += shl;
    2752                 :         891 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2753                 :             :                                               container,
    2754                 :         891 :                                               build_int_cst (sizetype,
    2755                 :             :                                                              shl));
    2756                 :             :         }
    2757                 :             : 
    2758                 :        1549 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2759                 :        1549 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2760                 :        1549 :       pattern_stmt
    2761                 :        1549 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2762                 :             :                                RSHIFT_EXPR, masked,
    2763                 :        1549 :                                build_int_cst (sizetype, shift_n));
    2764                 :        1549 :       result = gimple_assign_lhs (pattern_stmt);
    2765                 :             :     }
    2766                 :             : 
    2767                 :        1975 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2768                 :             :     {
    2769                 :        1293 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2770                 :        1293 :       pattern_stmt
    2771                 :        1293 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2772                 :             :                                NOP_EXPR, result);
    2773                 :             :     }
    2774                 :             : 
    2775                 :        1975 :   if (!lhs)
    2776                 :             :     {
    2777                 :         703 :       if (!vectype)
    2778                 :             :         return NULL;
    2779                 :             : 
    2780                 :         595 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2781                 :         595 :       vectype = truth_type_for (vectype);
    2782                 :             : 
    2783                 :             :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2784                 :             :                 same way as vect_recog_gcond_pattern does.  However because
    2785                 :             :                 patterns cannot match the same root twice,  when we handle and
    2786                 :             :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2787                 :             :                 apply anymore.  We should really fix it so that we don't need to
    2788                 :             :                 duplicate transformations like these.  */
    2789                 :         595 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2790                 :         595 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2791                 :         595 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2792                 :         595 :       gimple *new_stmt
    2793                 :         595 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2794                 :             :                                gimple_get_lhs (pattern_stmt),
    2795                 :             :                                fold_convert (container_type, cond_cst));
    2796                 :         595 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2797                 :         595 :       pattern_stmt
    2798                 :         595 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2799                 :         595 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2800                 :             :                              NULL_TREE, NULL_TREE);
    2801                 :             :     }
    2802                 :             : 
    2803                 :        1867 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2804                 :        1867 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2805                 :             : 
    2806                 :        1867 :   return pattern_stmt;
    2807                 :             : }
    2808                 :             : 
    2809                 :             : /* Function vect_recog_bit_insert_pattern
    2810                 :             : 
    2811                 :             :    Try to find the following pattern:
    2812                 :             : 
    2813                 :             :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2814                 :             : 
    2815                 :             :    Input:
    2816                 :             : 
    2817                 :             :    * STMT_VINFO: The stmt we want to replace.
    2818                 :             : 
    2819                 :             :    Output:
    2820                 :             : 
    2821                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2822                 :             : 
    2823                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2824                 :             :    stmts that constitute the pattern. In this case it will be:
    2825                 :             :    value = (container_type) value;          // Make sure
    2826                 :             :    shifted = value << bitpos;                 // Shift value into place
    2827                 :             :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2828                 :             :                                             // the 'to-write value'.
    2829                 :             :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2830                 :             :                                             // write to from the value we want
    2831                 :             :                                             // to write to.
    2832                 :             :    written = cleared | masked;              // Write bits.
    2833                 :             : 
    2834                 :             : 
    2835                 :             :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2836                 :             :    bits corresponding to the real size of the bitfield value we are writing to.
    2837                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2838                 :             : 
    2839                 :             : */
    2840                 :             : 
    2841                 :             : static gimple *
    2842                 :    27400177 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2843                 :             :                                tree *type_out)
    2844                 :             : {
    2845                 :    27400177 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2846                 :    25130657 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2847                 :             :     return NULL;
    2848                 :             : 
    2849                 :         604 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2850                 :         604 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2851                 :         604 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2852                 :             : 
    2853                 :         604 :   tree bf_type = TREE_TYPE (value);
    2854                 :         604 :   tree container_type = TREE_TYPE (container);
    2855                 :             : 
    2856                 :         604 :   if (!INTEGRAL_TYPE_P (container_type)
    2857                 :         604 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2858                 :             :     return NULL;
    2859                 :             : 
    2860                 :         500 :   gimple *pattern_stmt;
    2861                 :             : 
    2862                 :         500 :   vect_unpromoted_value unprom;
    2863                 :         500 :   unprom.set_op (value, vect_internal_def);
    2864                 :         500 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2865                 :             :                               get_vectype_for_scalar_type (vinfo,
    2866                 :             :                                                            container_type));
    2867                 :             : 
    2868                 :         500 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2869                 :         500 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2870                 :         500 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2871                 :         500 :   if (BYTES_BIG_ENDIAN)
    2872                 :             :     {
    2873                 :             :       shift_n = prec - shift_n - mask_width;
    2874                 :             :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2875                 :             :     }
    2876                 :             : 
    2877                 :         500 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2878                 :             :     {
    2879                 :           0 :       pattern_stmt =
    2880                 :           0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2881                 :             :                              NOP_EXPR, value);
    2882                 :           0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2883                 :           0 :       value = gimple_get_lhs (pattern_stmt);
    2884                 :             :     }
    2885                 :             : 
    2886                 :             :   /* Shift VALUE into place.  */
    2887                 :         500 :   tree shifted = value;
    2888                 :         500 :   if (shift_n)
    2889                 :             :     {
    2890                 :         261 :       gimple_seq stmts = NULL;
    2891                 :         261 :       shifted
    2892                 :         261 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2893                 :         261 :       if (!gimple_seq_empty_p (stmts))
    2894                 :         112 :         append_pattern_def_seq (vinfo, stmt_info,
    2895                 :             :                                 gimple_seq_first_stmt (stmts));
    2896                 :             :     }
    2897                 :             : 
    2898                 :         500 :   tree mask_t
    2899                 :         500 :     = wide_int_to_tree (container_type,
    2900                 :         500 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2901                 :             : 
    2902                 :             :   /* Clear bits we don't want to write back from SHIFTED.  */
    2903                 :         500 :   gimple_seq stmts = NULL;
    2904                 :         500 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2905                 :             :                               mask_t);
    2906                 :         500 :   if (!gimple_seq_empty_p (stmts))
    2907                 :             :     {
    2908                 :         110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2909                 :         110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2910                 :             :     }
    2911                 :             : 
    2912                 :             :   /* Mask off the bits in the container that we are to write to.  */
    2913                 :         500 :   mask_t = wide_int_to_tree (container_type,
    2914                 :         500 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2915                 :         500 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2916                 :         500 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2917                 :         500 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2918                 :             : 
    2919                 :             :   /* Write MASKED into CLEARED.  */
    2920                 :         500 :   pattern_stmt
    2921                 :         500 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2922                 :             :                            BIT_IOR_EXPR, cleared, masked);
    2923                 :             : 
    2924                 :         500 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2925                 :         500 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2926                 :             : 
    2927                 :         500 :   return pattern_stmt;
    2928                 :             : }
    2929                 :             : 
    2930                 :             : 
    2931                 :             : /* Recognize cases in which an operation is performed in one type WTYPE
    2932                 :             :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2933                 :             :    if we have:
    2934                 :             : 
    2935                 :             :      ATYPE a;  // narrower than NTYPE
    2936                 :             :      BTYPE b;  // narrower than NTYPE
    2937                 :             :      WTYPE aw = (WTYPE) a;
    2938                 :             :      WTYPE bw = (WTYPE) b;
    2939                 :             :      WTYPE res = aw + bw;  // only uses of aw and bw
    2940                 :             : 
    2941                 :             :    then it would be more efficient to do:
    2942                 :             : 
    2943                 :             :      NTYPE an = (NTYPE) a;
    2944                 :             :      NTYPE bn = (NTYPE) b;
    2945                 :             :      NTYPE resn = an + bn;
    2946                 :             :      WTYPE res = (WTYPE) resn;
    2947                 :             : 
    2948                 :             :    Other situations include things like:
    2949                 :             : 
    2950                 :             :      ATYPE a;  // NTYPE or narrower
    2951                 :             :      WTYPE aw = (WTYPE) a;
    2952                 :             :      WTYPE res = aw + b;
    2953                 :             : 
    2954                 :             :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2955                 :             :    to truncate "b" and do the operation on NTYPE instead:
    2956                 :             : 
    2957                 :             :      NTYPE an = (NTYPE) a;
    2958                 :             :      NTYPE bn = (NTYPE) b;  // truncation
    2959                 :             :      NTYPE resn = an + bn;
    2960                 :             :      WTYPE res = (WTYPE) resn;
    2961                 :             : 
    2962                 :             :    All users of "res" should then use "resn" instead, making the final
    2963                 :             :    statement dead (not marked as relevant).  The final statement is still
    2964                 :             :    needed to maintain the type correctness of the IR.
    2965                 :             : 
    2966                 :             :    vect_determine_precisions has already determined the minimum
    2967                 :             :    precison of the operation and the minimum precision required
    2968                 :             :    by users of the result.  */
    2969                 :             : 
    2970                 :             : static gimple *
    2971                 :    27400617 : vect_recog_over_widening_pattern (vec_info *vinfo,
    2972                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    2973                 :             : {
    2974                 :    27400617 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    2975                 :    18695663 :   if (!last_stmt)
    2976                 :             :     return NULL;
    2977                 :             : 
    2978                 :             :   /* See whether we have found that this operation can be done on a
    2979                 :             :      narrower type without changing its semantics.  */
    2980                 :    18695663 :   unsigned int new_precision = last_stmt_info->operation_precision;
    2981                 :    18695663 :   if (!new_precision)
    2982                 :             :     return NULL;
    2983                 :             : 
    2984                 :     1183312 :   tree lhs = gimple_assign_lhs (last_stmt);
    2985                 :     1183312 :   tree type = TREE_TYPE (lhs);
    2986                 :     1183312 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2987                 :             : 
    2988                 :             :   /* Punt for reductions where we don't handle the type conversions.  */
    2989                 :     1183312 :   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
    2990                 :             :     return NULL;
    2991                 :             : 
    2992                 :             :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    2993                 :             :      operands are interesting.  */
    2994                 :     1179900 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    2995                 :             : 
    2996                 :             :   /* Check the operands.  */
    2997                 :     1179900 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    2998                 :     1179900 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    2999                 :     1179900 :   unprom.quick_grow_cleared (nops);
    3000                 :     1179900 :   unsigned int min_precision = 0;
    3001                 :     1179900 :   bool single_use_p = false;
    3002                 :     3528361 :   for (unsigned int i = 0; i < nops; ++i)
    3003                 :             :     {
    3004                 :     2348890 :       tree op = gimple_op (last_stmt, first_op + i);
    3005                 :     2348890 :       if (TREE_CODE (op) == INTEGER_CST)
    3006                 :     1050673 :         unprom[i].set_op (op, vect_constant_def);
    3007                 :     1298217 :       else if (TREE_CODE (op) == SSA_NAME)
    3008                 :             :         {
    3009                 :     1298217 :           bool op_single_use_p = true;
    3010                 :     1298217 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3011                 :             :                                                      &op_single_use_p))
    3012                 :         429 :             return NULL;
    3013                 :             :           /* If:
    3014                 :             : 
    3015                 :             :              (1) N bits of the result are needed;
    3016                 :             :              (2) all inputs are widened from M<N bits; and
    3017                 :             :              (3) one operand OP is a single-use SSA name
    3018                 :             : 
    3019                 :             :              we can shift the M->N widening from OP to the output
    3020                 :             :              without changing the number or type of extensions involved.
    3021                 :             :              This then reduces the number of copies of STMT_INFO.
    3022                 :             : 
    3023                 :             :              If instead of (3) more than one operand is a single-use SSA name,
    3024                 :             :              shifting the extension to the output is even more of a win.
    3025                 :             : 
    3026                 :             :              If instead:
    3027                 :             : 
    3028                 :             :              (1) N bits of the result are needed;
    3029                 :             :              (2) one operand OP2 is widened from M2<N bits;
    3030                 :             :              (3) another operand OP1 is widened from M1<M2 bits; and
    3031                 :             :              (4) both OP1 and OP2 are single-use
    3032                 :             : 
    3033                 :             :              the choice is between:
    3034                 :             : 
    3035                 :             :              (a) truncating OP2 to M1, doing the operation on M1,
    3036                 :             :                  and then widening the result to N
    3037                 :             : 
    3038                 :             :              (b) widening OP1 to M2, doing the operation on M2, and then
    3039                 :             :                  widening the result to N
    3040                 :             : 
    3041                 :             :              Both shift the M2->N widening of the inputs to the output.
    3042                 :             :              (a) additionally shifts the M1->M2 widening to the output;
    3043                 :             :              it requires fewer copies of STMT_INFO but requires an extra
    3044                 :             :              M2->M1 truncation.
    3045                 :             : 
    3046                 :             :              Which is better will depend on the complexity and cost of
    3047                 :             :              STMT_INFO, which is hard to predict at this stage.  However,
    3048                 :             :              a clear tie-breaker in favor of (b) is the fact that the
    3049                 :             :              truncation in (a) increases the length of the operation chain.
    3050                 :             : 
    3051                 :             :              If instead of (4) only one of OP1 or OP2 is single-use,
    3052                 :             :              (b) is still a win over doing the operation in N bits:
    3053                 :             :              it still shifts the M2->N widening on the single-use operand
    3054                 :             :              to the output and reduces the number of STMT_INFO copies.
    3055                 :             : 
    3056                 :             :              If neither operand is single-use then operating on fewer than
    3057                 :             :              N bits might lead to more extensions overall.  Whether it does
    3058                 :             :              or not depends on global information about the vectorization
    3059                 :             :              region, and whether that's a good trade-off would again
    3060                 :             :              depend on the complexity and cost of the statements involved,
    3061                 :             :              as well as things like register pressure that are not normally
    3062                 :             :              modelled at this stage.  We therefore ignore these cases
    3063                 :             :              and just optimize the clear single-use wins above.
    3064                 :             : 
    3065                 :             :              Thus we take the maximum precision of the unpromoted operands
    3066                 :             :              and record whether any operand is single-use.  */
    3067                 :     1297788 :           if (unprom[i].dt == vect_internal_def)
    3068                 :             :             {
    3069                 :      932211 :               min_precision = MAX (min_precision,
    3070                 :             :                                    TYPE_PRECISION (unprom[i].type));
    3071                 :      932211 :               single_use_p |= op_single_use_p;
    3072                 :             :             }
    3073                 :             :         }
    3074                 :             :       else
    3075                 :             :         return NULL;
    3076                 :             :     }
    3077                 :             : 
    3078                 :             :   /* Although the operation could be done in operation_precision, we have
    3079                 :             :      to balance that against introducing extra truncations or extensions.
    3080                 :             :      Calculate the minimum precision that can be handled efficiently.
    3081                 :             : 
    3082                 :             :      The loop above determined that the operation could be handled
    3083                 :             :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3084                 :             :      extension from the inputs to the output without introducing more
    3085                 :             :      instructions, and would reduce the number of instructions required
    3086                 :             :      for STMT_INFO itself.
    3087                 :             : 
    3088                 :             :      vect_determine_precisions has also determined that the result only
    3089                 :             :      needs min_output_precision bits.  Truncating by a factor of N times
    3090                 :             :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3091                 :             :      than min_output_precision, doing the operation in TYPE and truncating
    3092                 :             :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3093                 :             :      In contrast:
    3094                 :             : 
    3095                 :             :      - truncating the input to a unary operation and doing the operation
    3096                 :             :        in the new type requires at most N - 1 + 1 = N instructions per
    3097                 :             :        output vector
    3098                 :             : 
    3099                 :             :      - doing the same for a binary operation requires at most
    3100                 :             :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3101                 :             : 
    3102                 :             :      Both unary and binary operations require fewer instructions than
    3103                 :             :      this if the operands were extended from a suitable truncated form.
    3104                 :             :      Thus there is usually nothing to lose by doing operations in
    3105                 :             :      min_output_precision bits, but there can be something to gain.  */
    3106                 :     1179471 :   if (!single_use_p)
    3107                 :      869929 :     min_precision = last_stmt_info->min_output_precision;
    3108                 :             :   else
    3109                 :      309542 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3110                 :             : 
    3111                 :             :   /* Apply the minimum efficient precision we just calculated.  */
    3112                 :     1179471 :   if (new_precision < min_precision)
    3113                 :             :     new_precision = min_precision;
    3114                 :     1179471 :   new_precision = vect_element_precision (new_precision);
    3115                 :     1179471 :   if (new_precision >= TYPE_PRECISION (type))
    3116                 :             :     return NULL;
    3117                 :             : 
    3118                 :      130585 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3119                 :             : 
    3120                 :      130585 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3121                 :      130585 :   if (!*type_out)
    3122                 :             :     return NULL;
    3123                 :             : 
    3124                 :             :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3125                 :      116326 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3126                 :      116326 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3127                 :             : 
    3128                 :             :   /* If we're truncating an operation, we need to make sure that we
    3129                 :             :      don't introduce new undefined overflow.  The codes tested here are
    3130                 :             :      a subset of those accepted by vect_truncatable_operation_p.  */
    3131                 :      116326 :   tree op_type = new_type;
    3132                 :      116326 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3133                 :      148330 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3134                 :       24394 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3135                 :             : 
    3136                 :             :   /* We specifically don't check here whether the target supports the
    3137                 :             :      new operation, since it might be something that a later pattern
    3138                 :             :      wants to rewrite anyway.  If targets have a minimum element size
    3139                 :             :      for some optabs, we should pattern-match smaller ops to larger ops
    3140                 :             :      where beneficial.  */
    3141                 :      116326 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3142                 :      116326 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3143                 :      116326 :   if (!new_vectype || !op_vectype)
    3144                 :             :     return NULL;
    3145                 :             : 
    3146                 :      116326 :   if (dump_enabled_p ())
    3147                 :        4035 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3148                 :             :                      type, new_type);
    3149                 :             : 
    3150                 :             :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3151                 :      116326 :   tree ops[3] = {};
    3152                 :      116535 :   for (unsigned int i = 1; i < first_op; ++i)
    3153                 :         209 :     ops[i - 1] = gimple_op (last_stmt, i);
    3154                 :      116326 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3155                 :      116326 :                        op_type, &unprom[0], op_vectype);
    3156                 :             : 
    3157                 :             :   /* Use the operation to produce a result of type OP_TYPE.  */
    3158                 :      116326 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3159                 :      116326 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3160                 :             :                                               ops[0], ops[1], ops[2]);
    3161                 :      116326 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3162                 :             : 
    3163                 :      116326 :   if (dump_enabled_p ())
    3164                 :        4035 :     dump_printf_loc (MSG_NOTE, vect_location,
    3165                 :             :                      "created pattern stmt: %G", pattern_stmt);
    3166                 :             : 
    3167                 :             :   /* Convert back to the original signedness, if OP_TYPE is different
    3168                 :             :      from NEW_TYPE.  */
    3169                 :      116326 :   if (op_type != new_type)
    3170                 :       24394 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3171                 :             :                                         pattern_stmt, op_vectype);
    3172                 :             : 
    3173                 :             :   /* Promote the result to the original type.  */
    3174                 :      116326 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3175                 :             :                                       pattern_stmt, new_vectype);
    3176                 :             : 
    3177                 :      116326 :   return pattern_stmt;
    3178                 :     1179900 : }
    3179                 :             : 
    3180                 :             : /* Recognize the following patterns:
    3181                 :             : 
    3182                 :             :      ATYPE a;  // narrower than TYPE
    3183                 :             :      BTYPE b;  // narrower than TYPE
    3184                 :             : 
    3185                 :             :    1) Multiply high with scaling
    3186                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3187                 :             :      Here, c is bitsize (TYPE) / 2 - 1.
    3188                 :             : 
    3189                 :             :    2) ... or also with rounding
    3190                 :             :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3191                 :             :      Here, d is bitsize (TYPE) / 2 - 2.
    3192                 :             : 
    3193                 :             :    3) Normal multiply high
    3194                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3195                 :             :      Here, e is bitsize (TYPE) / 2.
    3196                 :             : 
    3197                 :             :    where only the bottom half of res is used.  */
    3198                 :             : 
    3199                 :             : static gimple *
    3200                 :    27504606 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3201                 :             :                           stmt_vec_info last_stmt_info, tree *type_out)
    3202                 :             : {
    3203                 :             :   /* Check for a right shift.  */
    3204                 :    27504606 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3205                 :    18799530 :   if (!last_stmt
    3206                 :    18799530 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3207                 :             :     return NULL;
    3208                 :             : 
    3209                 :             :   /* Check that the shift result is wider than the users of the
    3210                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3211                 :      302396 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3212                 :      302396 :   unsigned int target_precision
    3213                 :      302396 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3214                 :      302396 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3215                 :      302396 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3216                 :             :     return NULL;
    3217                 :             : 
    3218                 :             :   /* Look through any change in sign on the outer shift input.  */
    3219                 :       40094 :   vect_unpromoted_value unprom_rshift_input;
    3220                 :       40094 :   tree rshift_input = vect_look_through_possible_promotion
    3221                 :       40094 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3222                 :       40094 :   if (!rshift_input
    3223                 :       40094 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3224                 :       39473 :            != TYPE_PRECISION (lhs_type))
    3225                 :             :     return NULL;
    3226                 :             : 
    3227                 :             :   /* Get the definition of the shift input.  */
    3228                 :       37547 :   stmt_vec_info rshift_input_stmt_info
    3229                 :       37547 :     = vect_get_internal_def (vinfo, rshift_input);
    3230                 :       37547 :   if (!rshift_input_stmt_info)
    3231                 :             :     return NULL;
    3232                 :       33499 :   gassign *rshift_input_stmt
    3233                 :    27533917 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3234                 :       29380 :   if (!rshift_input_stmt)
    3235                 :             :     return NULL;
    3236                 :             : 
    3237                 :       29380 :   stmt_vec_info mulh_stmt_info;
    3238                 :       29380 :   tree scale_term;
    3239                 :       29380 :   bool rounding_p = false;
    3240                 :             : 
    3241                 :             :   /* Check for the presence of the rounding term.  */
    3242                 :       35502 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3243                 :             :     {
    3244                 :             :       /* Check that the outer shift was by 1.  */
    3245                 :       16554 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3246                 :        8229 :         return NULL;
    3247                 :             : 
    3248                 :             :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3249                 :        1326 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3250                 :             :         return NULL;
    3251                 :             : 
    3252                 :             :       /* Look through any change in sign on the addition input.  */
    3253                 :          91 :       vect_unpromoted_value unprom_plus_input;
    3254                 :          91 :       tree plus_input = vect_look_through_possible_promotion
    3255                 :          91 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3256                 :          91 :       if (!plus_input
    3257                 :          91 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3258                 :          91 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3259                 :             :         return NULL;
    3260                 :             : 
    3261                 :             :       /* Get the definition of the multiply-high-scale part.  */
    3262                 :          91 :       stmt_vec_info plus_input_stmt_info
    3263                 :          91 :         = vect_get_internal_def (vinfo, plus_input);
    3264                 :          91 :       if (!plus_input_stmt_info)
    3265                 :             :         return NULL;
    3266                 :          91 :       gassign *plus_input_stmt
    3267                 :        8320 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3268                 :          91 :       if (!plus_input_stmt
    3269                 :          91 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3270                 :             :         return NULL;
    3271                 :             : 
    3272                 :             :       /* Look through any change in sign on the scaling input.  */
    3273                 :          48 :       vect_unpromoted_value unprom_scale_input;
    3274                 :          48 :       tree scale_input = vect_look_through_possible_promotion
    3275                 :          48 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3276                 :          48 :       if (!scale_input
    3277                 :          48 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3278                 :          48 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3279                 :             :         return NULL;
    3280                 :             : 
    3281                 :             :       /* Get the definition of the multiply-high part.  */
    3282                 :          48 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3283                 :          48 :       if (!mulh_stmt_info)
    3284                 :             :         return NULL;
    3285                 :             : 
    3286                 :             :       /* Get the scaling term.  */
    3287                 :          48 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3288                 :          48 :       rounding_p = true;
    3289                 :             :     }
    3290                 :             :   else
    3291                 :             :     {
    3292                 :       21103 :       mulh_stmt_info = rshift_input_stmt_info;
    3293                 :       21103 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3294                 :             :     }
    3295                 :             : 
    3296                 :             :   /* Check that the scaling factor is constant.  */
    3297                 :       21151 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3298                 :             :     return NULL;
    3299                 :             : 
    3300                 :             :   /* Check whether the scaling input term can be seen as two widened
    3301                 :             :      inputs multiplied together.  */
    3302                 :       60615 :   vect_unpromoted_value unprom_mult[2];
    3303                 :       20205 :   tree new_type;
    3304                 :       20205 :   unsigned int nops
    3305                 :       20205 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3306                 :             :                             false, 2, unprom_mult, &new_type);
    3307                 :       20205 :   if (nops != 2)
    3308                 :             :     return NULL;
    3309                 :             : 
    3310                 :             :   /* Adjust output precision.  */
    3311                 :        2479 :   if (TYPE_PRECISION (new_type) < target_precision)
    3312                 :           0 :     new_type = build_nonstandard_integer_type
    3313                 :           0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3314                 :             : 
    3315                 :        2479 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3316                 :        2479 :   internal_fn ifn;
    3317                 :             :   /* Check that the scaling factor is expected.  Instead of
    3318                 :             :      target_precision, we should use the one that we actually
    3319                 :             :      use for internal function.  */
    3320                 :        2479 :   if (rounding_p)
    3321                 :             :     {
    3322                 :             :       /* Check pattern 2).  */
    3323                 :          96 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3324                 :         144 :           != TYPE_PRECISION (lhs_type))
    3325                 :             :         return NULL;
    3326                 :             : 
    3327                 :             :       ifn = IFN_MULHRS;
    3328                 :             :     }
    3329                 :             :   else
    3330                 :             :     {
    3331                 :             :       /* Check for pattern 1).  */
    3332                 :        4862 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3333                 :        7293 :           == TYPE_PRECISION (lhs_type))
    3334                 :             :         ifn = IFN_MULHS;
    3335                 :             :       /* Check for pattern 3).  */
    3336                 :        2387 :       else if (wi::to_widest (scale_term) + mult_precision
    3337                 :        4774 :                == TYPE_PRECISION (lhs_type))
    3338                 :             :         ifn = IFN_MULH;
    3339                 :             :       else
    3340                 :             :         return NULL;
    3341                 :             :     }
    3342                 :             : 
    3343                 :        2407 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3344                 :             : 
    3345                 :             :   /* Check for target support.  */
    3346                 :        2407 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3347                 :        2407 :   if (!new_vectype
    3348                 :        4800 :       || !direct_internal_fn_supported_p
    3349                 :        2393 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3350                 :        2338 :     return NULL;
    3351                 :             : 
    3352                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3353                 :             :      it's likely to be discarded.  */
    3354                 :          69 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3355                 :          69 :   if (!*type_out)
    3356                 :             :     return NULL;
    3357                 :             : 
    3358                 :             :   /* Generate the IFN_MULHRS call.  */
    3359                 :          69 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3360                 :          69 :   tree new_ops[2];
    3361                 :          69 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3362                 :             :                        unprom_mult, new_vectype);
    3363                 :          69 :   gcall *mulhrs_stmt
    3364                 :          69 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3365                 :          69 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3366                 :          69 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3367                 :             : 
    3368                 :          69 :   if (dump_enabled_p ())
    3369                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3370                 :             :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3371                 :             : 
    3372                 :          69 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3373                 :          69 :                               mulhrs_stmt, new_vectype);
    3374                 :             : }
    3375                 :             : 
    3376                 :             : /* Recognize the patterns:
    3377                 :             : 
    3378                 :             :             ATYPE a;  // narrower than TYPE
    3379                 :             :             BTYPE b;  // narrower than TYPE
    3380                 :             :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3381                 :             :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3382                 :             : 
    3383                 :             :    where only the bottom half of avg is used.  Try to transform them into:
    3384                 :             : 
    3385                 :             :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3386                 :             :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3387                 :             : 
    3388                 :             :   followed by:
    3389                 :             : 
    3390                 :             :             TYPE avg = (TYPE) avg';
    3391                 :             : 
    3392                 :             :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3393                 :             :   of avg is used, all or part of the cast of avg' should become redundant.
    3394                 :             : 
    3395                 :             :   If there is no target support available, generate code to distribute rshift
    3396                 :             :   over plus and add a carry.  */
    3397                 :             : 
    3398                 :             : static gimple *
    3399                 :    27502875 : vect_recog_average_pattern (vec_info *vinfo,
    3400                 :             :                             stmt_vec_info last_stmt_info, tree *type_out)
    3401                 :             : {
    3402                 :             :   /* Check for a shift right by one bit.  */
    3403                 :    27502875 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3404                 :    18797921 :   if (!last_stmt
    3405                 :    18797921 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3406                 :      302252 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3407                 :    27458032 :     return NULL;
    3408                 :             : 
    3409                 :             :   /* Check that the shift result is wider than the users of the
    3410                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3411                 :       44843 :   tree lhs = gimple_assign_lhs (last_stmt);
    3412                 :       44843 :   tree type = TREE_TYPE (lhs);
    3413                 :       44843 :   unsigned int target_precision
    3414                 :       44843 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3415                 :       44843 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3416                 :             :     return NULL;
    3417                 :             : 
    3418                 :             :   /* Look through any change in sign on the shift input.  */
    3419                 :        2303 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3420                 :        2303 :   vect_unpromoted_value unprom_plus;
    3421                 :        2303 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3422                 :             :                                                      &unprom_plus);
    3423                 :        2303 :   if (!rshift_rhs
    3424                 :        2303 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3425                 :             :     return NULL;
    3426                 :             : 
    3427                 :             :   /* Get the definition of the shift input.  */
    3428                 :        2293 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3429                 :        2293 :   if (!plus_stmt_info)
    3430                 :             :     return NULL;
    3431                 :             : 
    3432                 :             :   /* Check whether the shift input can be seen as a tree of additions on
    3433                 :             :      2 or 3 widened inputs.
    3434                 :             : 
    3435                 :             :      Note that the pattern should be a win even if the result of one or
    3436                 :             :      more additions is reused elsewhere: if the pattern matches, we'd be
    3437                 :             :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3438                 :        9100 :   internal_fn ifn = IFN_AVG_FLOOR;
    3439                 :        9100 :   vect_unpromoted_value unprom[3];
    3440                 :        2275 :   tree new_type;
    3441                 :        2275 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3442                 :             :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3443                 :             :                                             unprom, &new_type);
    3444                 :        2275 :   if (nops == 0)
    3445                 :             :     return NULL;
    3446                 :         912 :   if (nops == 3)
    3447                 :             :     {
    3448                 :             :       /* Check that one operand is 1.  */
    3449                 :             :       unsigned int i;
    3450                 :         921 :       for (i = 0; i < 3; ++i)
    3451                 :         861 :         if (integer_onep (unprom[i].op))
    3452                 :             :           break;
    3453                 :         287 :       if (i == 3)
    3454                 :             :         return NULL;
    3455                 :             :       /* Throw away the 1 operand and keep the other two.  */
    3456                 :         227 :       if (i < 2)
    3457                 :           0 :         unprom[i] = unprom[2];
    3458                 :             :       ifn = IFN_AVG_CEIL;
    3459                 :             :     }
    3460                 :             : 
    3461                 :         852 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3462                 :             : 
    3463                 :             :   /* We know that:
    3464                 :             : 
    3465                 :             :      (a) the operation can be viewed as:
    3466                 :             : 
    3467                 :             :            TYPE widened0 = (TYPE) UNPROM[0];
    3468                 :             :            TYPE widened1 = (TYPE) UNPROM[1];
    3469                 :             :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3470                 :             :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3471                 :             : 
    3472                 :             :      (b) the first two statements are equivalent to:
    3473                 :             : 
    3474                 :             :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3475                 :             :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3476                 :             : 
    3477                 :             :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3478                 :             :          where sensible;
    3479                 :             : 
    3480                 :             :      (d) all the operations can be performed correctly at twice the width of
    3481                 :             :          NEW_TYPE, due to the nature of the average operation; and
    3482                 :             : 
    3483                 :             :      (e) users of the result of the right shift need only TARGET_PRECISION
    3484                 :             :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3485                 :             :          precision.
    3486                 :             : 
    3487                 :             :      Under these circumstances, the only situation in which NEW_TYPE
    3488                 :             :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3489                 :             :      and an addition result are all used more than once.  Thus we can
    3490                 :             :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3491                 :             :      as "free", whereas widening the result of the average instruction
    3492                 :             :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3493                 :             :      therefore better not to go narrower than TARGET_PRECISION.  */
    3494                 :         852 :   if (TYPE_PRECISION (new_type) < target_precision)
    3495                 :           8 :     new_type = build_nonstandard_integer_type (target_precision,
    3496                 :           8 :                                                TYPE_UNSIGNED (new_type));
    3497                 :             : 
    3498                 :             :   /* Check for target support.  */
    3499                 :         852 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3500                 :         852 :   if (!new_vectype)
    3501                 :             :     return NULL;
    3502                 :             : 
    3503                 :         852 :   bool fallback_p = false;
    3504                 :             : 
    3505                 :         852 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3506                 :             :     ;
    3507                 :         730 :   else if (TYPE_UNSIGNED (new_type)
    3508                 :         267 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3509                 :         267 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3510                 :         267 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3511                 :         997 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3512                 :             :     fallback_p = true;
    3513                 :             :   else
    3514                 :         463 :     return NULL;
    3515                 :             : 
    3516                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3517                 :             :      it's likely to be discarded.  */
    3518                 :         389 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3519                 :         389 :   if (!*type_out)
    3520                 :             :     return NULL;
    3521                 :             : 
    3522                 :         388 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3523                 :         388 :   tree new_ops[2];
    3524                 :         388 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3525                 :             :                        unprom, new_vectype);
    3526                 :             : 
    3527                 :         388 :   if (fallback_p)
    3528                 :             :     {
    3529                 :             :       /* As a fallback, generate code for following sequence:
    3530                 :             : 
    3531                 :             :          shifted_op0 = new_ops[0] >> 1;
    3532                 :             :          shifted_op1 = new_ops[1] >> 1;
    3533                 :             :          sum_of_shifted = shifted_op0 + shifted_op1;
    3534                 :             :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3535                 :             :          carry = unmasked_carry & 1;
    3536                 :             :          new_var = sum_of_shifted + carry;
    3537                 :             :       */
    3538                 :             : 
    3539                 :         266 :       tree one_cst = build_one_cst (new_type);
    3540                 :         266 :       gassign *g;
    3541                 :             : 
    3542                 :         266 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3543                 :         266 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3544                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3545                 :             : 
    3546                 :         266 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3547                 :         266 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3548                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3549                 :             : 
    3550                 :         266 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3551                 :         266 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3552                 :             :                                shifted_op0, shifted_op1);
    3553                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3554                 :             : 
    3555                 :         266 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3556                 :         266 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3557                 :         266 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3558                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3559                 :             : 
    3560                 :         266 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3561                 :         266 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3562                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3563                 :             : 
    3564                 :         266 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3565                 :         266 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3566                 :             :     }
    3567                 :             : 
    3568                 :             :   /* Generate the IFN_AVG* call.  */
    3569                 :         122 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3570                 :             :                                                     new_ops[1]);
    3571                 :         122 :   gimple_call_set_lhs (average_stmt, new_var);
    3572                 :         122 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3573                 :             : 
    3574                 :         122 :   if (dump_enabled_p ())
    3575                 :          42 :     dump_printf_loc (MSG_NOTE, vect_location,
    3576                 :             :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3577                 :             : 
    3578                 :         122 :   return vect_convert_output (vinfo, last_stmt_info,
    3579                 :         122 :                               type, average_stmt, new_vectype);
    3580                 :             : }
    3581                 :             : 
    3582                 :             : /* Recognize cases in which the input to a cast is wider than its
    3583                 :             :    output, and the input is fed by a widening operation.  Fold this
    3584                 :             :    by removing the unnecessary intermediate widening.  E.g.:
    3585                 :             : 
    3586                 :             :      unsigned char a;
    3587                 :             :      unsigned int b = (unsigned int) a;
    3588                 :             :      unsigned short c = (unsigned short) b;
    3589                 :             : 
    3590                 :             :    -->
    3591                 :             : 
    3592                 :             :      unsigned short c = (unsigned short) a;
    3593                 :             : 
    3594                 :             :    Although this is rare in input IR, it is an expected side-effect
    3595                 :             :    of the over-widening pattern above.
    3596                 :             : 
    3597                 :             :    This is beneficial also for integer-to-float conversions, if the
    3598                 :             :    widened integer has more bits than the float, and if the unwidened
    3599                 :             :    input doesn't.  */
    3600                 :             : 
    3601                 :             : static gimple *
    3602                 :    27504606 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3603                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3604                 :             : {
    3605                 :             :   /* Check for a cast, including an integer-to-float conversion.  */
    3606                 :    46265221 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3607                 :    18799461 :   if (!last_stmt)
    3608                 :             :     return NULL;
    3609                 :    18799461 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3610                 :    18799461 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3611                 :             :     return NULL;
    3612                 :             : 
    3613                 :             :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3614                 :     2652566 :   tree lhs = gimple_assign_lhs (last_stmt);
    3615                 :     2652566 :   if (!lhs)
    3616                 :             :     return NULL;
    3617                 :     2652566 :   tree lhs_type = TREE_TYPE (lhs);
    3618                 :     2652566 :   scalar_mode lhs_mode;
    3619                 :     2635401 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3620                 :     5286274 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3621                 :       22070 :     return NULL;
    3622                 :             : 
    3623                 :             :   /* Check for a narrowing operation (from a vector point of view).  */
    3624                 :     2630496 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3625                 :     2630496 :   tree rhs_type = TREE_TYPE (rhs);
    3626                 :     2630496 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3627                 :     2361968 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3628                 :     7183504 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3629                 :             :     return NULL;
    3630                 :             : 
    3631                 :             :   /* Try to find an unpromoted input.  */
    3632                 :      320280 :   vect_unpromoted_value unprom;
    3633                 :      320280 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3634                 :      320280 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3635                 :             :     return NULL;
    3636                 :             : 
    3637                 :             :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3638                 :             :      same when extending from UNPROM as they are when extending from RHS.  */
    3639                 :       38968 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3640                 :       38968 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3641                 :             :     return NULL;
    3642                 :             : 
    3643                 :             :   /* We can get the same result by casting UNPROM directly, to avoid
    3644                 :             :      the unnecessary widening and narrowing.  */
    3645                 :       38848 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3646                 :             : 
    3647                 :       38848 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3648                 :       38848 :   if (!*type_out)
    3649                 :             :     return NULL;
    3650                 :             : 
    3651                 :       38846 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3652                 :       38846 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3653                 :       38846 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3654                 :             : 
    3655                 :       38846 :   return pattern_stmt;
    3656                 :             : }
    3657                 :             : 
    3658                 :             : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3659                 :             :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3660                 :             : 
    3661                 :             : static gimple *
    3662                 :    27444030 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3663                 :             :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3664                 :             : {
    3665                 :    27444030 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3666                 :             :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3667                 :    27444030 :                                       "vect_recog_widen_shift_pattern");
    3668                 :             : }
    3669                 :             : 
    3670                 :             : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3671                 :             : 
    3672                 :             :    type a_t, b_t, c_t;
    3673                 :             : 
    3674                 :             :    S0 a_t = b_t r<< c_t;
    3675                 :             : 
    3676                 :             :   Input/Output:
    3677                 :             : 
    3678                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    3679                 :             :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3680                 :             :     with a sequence:
    3681                 :             : 
    3682                 :             :    S1 d_t = -c_t;
    3683                 :             :    S2 e_t = d_t & (B - 1);
    3684                 :             :    S3 f_t = b_t << c_t;
    3685                 :             :    S4 g_t = b_t >> e_t;
    3686                 :             :    S0 a_t = f_t | g_t;
    3687                 :             : 
    3688                 :             :     where B is element bitsize of type.
    3689                 :             : 
    3690                 :             :   Output:
    3691                 :             : 
    3692                 :             :   * TYPE_OUT: The type of the output of this pattern.
    3693                 :             : 
    3694                 :             :   * Return value: A new stmt that will be used to replace the rotate
    3695                 :             :     S0 stmt.  */
    3696                 :             : 
    3697                 :             : static gimple *
    3698                 :    27444030 : vect_recog_rotate_pattern (vec_info *vinfo,
    3699                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3700                 :             : {
    3701                 :    27444030 :   gimple *last_stmt = stmt_vinfo->stmt;
    3702                 :    27444030 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3703                 :    27444030 :   gimple *pattern_stmt, *def_stmt;
    3704                 :    27444030 :   enum tree_code rhs_code;
    3705                 :    27444030 :   enum vect_def_type dt;
    3706                 :    27444030 :   optab optab1, optab2;
    3707                 :    27444030 :   edge ext_def = NULL;
    3708                 :    27444030 :   bool bswap16_p = false;
    3709                 :             : 
    3710                 :    27444030 :   if (is_gimple_assign (last_stmt))
    3711                 :             :     {
    3712                 :    18738859 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3713                 :    18738859 :       switch (rhs_code)
    3714                 :             :         {
    3715                 :        4303 :         case LROTATE_EXPR:
    3716                 :        4303 :         case RROTATE_EXPR:
    3717                 :        4303 :           break;
    3718                 :             :         default:
    3719                 :             :           return NULL;
    3720                 :             :         }
    3721                 :             : 
    3722                 :        4303 :       lhs = gimple_assign_lhs (last_stmt);
    3723                 :        4303 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3724                 :        4303 :       type = TREE_TYPE (oprnd0);
    3725                 :        4303 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3726                 :             :     }
    3727                 :     8705171 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3728                 :             :     {
    3729                 :             :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3730                 :             :          The vectorizer has bswap support, but only if the argument isn't
    3731                 :             :          promoted.  */
    3732                 :         138 :       lhs = gimple_call_lhs (last_stmt);
    3733                 :         138 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3734                 :         138 :       type = TREE_TYPE (oprnd0);
    3735                 :         138 :       if (!lhs
    3736                 :         138 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3737                 :         138 :           || TYPE_PRECISION (type) <= 16
    3738                 :         126 :           || TREE_CODE (oprnd0) != SSA_NAME
    3739                 :         264 :           || BITS_PER_UNIT != 8)
    3740                 :          96 :         return NULL;
    3741                 :             : 
    3742                 :         126 :       stmt_vec_info def_stmt_info;
    3743                 :         126 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3744                 :             :         return NULL;
    3745                 :             : 
    3746                 :         126 :       if (dt != vect_internal_def)
    3747                 :             :         return NULL;
    3748                 :             : 
    3749                 :         122 :       if (gimple_assign_cast_p (def_stmt))
    3750                 :             :         {
    3751                 :          84 :           def = gimple_assign_rhs1 (def_stmt);
    3752                 :         168 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3753                 :         168 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3754                 :             :             oprnd0 = def;
    3755                 :             :         }
    3756                 :             : 
    3757                 :         122 :       type = TREE_TYPE (lhs);
    3758                 :         122 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3759                 :         122 :       if (vectype == NULL_TREE)
    3760                 :             :         return NULL;
    3761                 :             : 
    3762                 :         122 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3763                 :             :         {
    3764                 :             :           /* The encoding uses one stepped pattern for each byte in the
    3765                 :             :              16-bit word.  */
    3766                 :         122 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3767                 :         488 :           for (unsigned i = 0; i < 3; ++i)
    3768                 :        1098 :             for (unsigned j = 0; j < 2; ++j)
    3769                 :         732 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3770                 :             : 
    3771                 :         122 :           vec_perm_indices indices (elts, 1,
    3772                 :         122 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3773                 :         122 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3774                 :         122 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3775                 :             :             {
    3776                 :             :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3777                 :             :                  undo the argument promotion.  */
    3778                 :          80 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3779                 :             :                 {
    3780                 :          29 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3781                 :          29 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3782                 :          29 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3783                 :          29 :                   oprnd0 = def;
    3784                 :             :                 }
    3785                 :             : 
    3786                 :             :               /* Pattern detected.  */
    3787                 :          80 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3788                 :             : 
    3789                 :          80 :               *type_out = vectype;
    3790                 :             : 
    3791                 :             :               /* Pattern supported.  Create a stmt to be used to replace the
    3792                 :             :                  pattern, with the unpromoted argument.  */
    3793                 :          80 :               var = vect_recog_temp_ssa_var (type, NULL);
    3794                 :          80 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3795                 :             :                                                 1, oprnd0);
    3796                 :          80 :               gimple_call_set_lhs (pattern_stmt, var);
    3797                 :          80 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3798                 :             :                                       gimple_call_fntype (last_stmt));
    3799                 :          80 :               return pattern_stmt;
    3800                 :             :             }
    3801                 :         122 :         }
    3802                 :             : 
    3803                 :          42 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3804                 :          42 :       rhs_code = LROTATE_EXPR;
    3805                 :          42 :       bswap16_p = true;
    3806                 :             :     }
    3807                 :             :   else
    3808                 :             :     return NULL;
    3809                 :             : 
    3810                 :        4345 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3811                 :        4225 :       || !INTEGRAL_TYPE_P (type)
    3812                 :        8561 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3813                 :             :     return NULL;
    3814                 :             : 
    3815                 :        4216 :   stmt_vec_info def_stmt_info;
    3816                 :        4216 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3817                 :             :     return NULL;
    3818                 :             : 
    3819                 :        4216 :   if (dt != vect_internal_def
    3820                 :        4007 :       && dt != vect_constant_def
    3821                 :          21 :       && dt != vect_external_def)
    3822                 :             :     return NULL;
    3823                 :             : 
    3824                 :        4210 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3825                 :        4210 :   if (vectype == NULL_TREE)
    3826                 :             :     return NULL;
    3827                 :             : 
    3828                 :             :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3829                 :             :      don't do anything here.  */
    3830                 :        3991 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3831                 :        3991 :   if (optab1
    3832                 :        3991 :       && can_implement_p (optab1, TYPE_MODE (vectype)))
    3833                 :             :     {
    3834                 :          34 :      use_rotate:
    3835                 :          34 :       if (bswap16_p)
    3836                 :             :         {
    3837                 :           0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3838                 :             :             {
    3839                 :           0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3840                 :           0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3841                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3842                 :           0 :               oprnd0 = def;
    3843                 :             :             }
    3844                 :             : 
    3845                 :             :           /* Pattern detected.  */
    3846                 :           0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3847                 :             : 
    3848                 :           0 :           *type_out = vectype;
    3849                 :             : 
    3850                 :             :           /* Pattern supported.  Create a stmt to be used to replace the
    3851                 :             :              pattern.  */
    3852                 :           0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3853                 :           0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3854                 :             :                                               oprnd1);
    3855                 :           0 :           return pattern_stmt;
    3856                 :             :         }
    3857                 :             :       return NULL;
    3858                 :             :     }
    3859                 :             : 
    3860                 :        3957 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3861                 :             :     {
    3862                 :        3896 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3863                 :        3896 :       if (optab2
    3864                 :        3896 :           && can_implement_p (optab2, TYPE_MODE (vectype)))
    3865                 :           0 :         goto use_rotate;
    3866                 :             :     }
    3867                 :             : 
    3868                 :        3957 :   tree utype = unsigned_type_for (type);
    3869                 :        3957 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3870                 :        3957 :   if (!uvectype)
    3871                 :             :     return NULL;
    3872                 :             : 
    3873                 :             :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3874                 :             :      don't do anything here either.  */
    3875                 :        3957 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3876                 :        3957 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3877                 :        3957 :   if (!optab1
    3878                 :        3957 :       || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3879                 :         331 :       || !optab2
    3880                 :        4288 :       || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3881                 :             :     {
    3882                 :        3626 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3883                 :             :         return NULL;
    3884                 :        3583 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3885                 :        3583 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3886                 :        3583 :       if (!optab1
    3887                 :        3583 :           || !can_implement_p (optab1, TYPE_MODE (uvectype))
    3888                 :        2952 :           || !optab2
    3889                 :        6535 :           || !can_implement_p (optab2, TYPE_MODE (uvectype)))
    3890                 :         631 :         return NULL;
    3891                 :             :     }
    3892                 :             : 
    3893                 :        3283 :   *type_out = vectype;
    3894                 :             : 
    3895                 :        3283 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3896                 :             :     {
    3897                 :          63 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3898                 :          63 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3899                 :          63 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3900                 :          63 :       oprnd0 = def;
    3901                 :             :     }
    3902                 :             : 
    3903                 :        3283 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3904                 :          13 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3905                 :             : 
    3906                 :        3283 :   def = NULL_TREE;
    3907                 :        3283 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3908                 :        3283 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3909                 :             :     def = oprnd1;
    3910                 :          28 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3911                 :             :     {
    3912                 :           0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3913                 :           0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3914                 :           0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3915                 :           0 :              == TYPE_PRECISION (type))
    3916                 :             :         def = rhs1;
    3917                 :             :     }
    3918                 :             : 
    3919                 :        3255 :   if (def == NULL_TREE)
    3920                 :             :     {
    3921                 :          28 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3922                 :          28 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3923                 :          28 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3924                 :             :     }
    3925                 :        3283 :   stype = TREE_TYPE (def);
    3926                 :             : 
    3927                 :        3283 :   if (TREE_CODE (def) == INTEGER_CST)
    3928                 :             :     {
    3929                 :        3172 :       if (!tree_fits_uhwi_p (def)
    3930                 :        3172 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3931                 :        6344 :           || integer_zerop (def))
    3932                 :           0 :         return NULL;
    3933                 :        3172 :       def2 = build_int_cst (stype,
    3934                 :        3172 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3935                 :             :     }
    3936                 :             :   else
    3937                 :             :     {
    3938                 :         111 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    3939                 :             : 
    3940                 :         111 :       if (vecstype == NULL_TREE)
    3941                 :             :         return NULL;
    3942                 :         111 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3943                 :         111 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    3944                 :         111 :       if (ext_def)
    3945                 :             :         {
    3946                 :          13 :           basic_block new_bb
    3947                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3948                 :          13 :           gcc_assert (!new_bb);
    3949                 :             :         }
    3950                 :             :       else
    3951                 :          98 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3952                 :             : 
    3953                 :         111 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3954                 :         111 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    3955                 :         111 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    3956                 :             :                                       gimple_assign_lhs (def_stmt), mask);
    3957                 :         111 :       if (ext_def)
    3958                 :             :         {
    3959                 :          13 :           basic_block new_bb
    3960                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3961                 :          13 :           gcc_assert (!new_bb);
    3962                 :             :         }
    3963                 :             :       else
    3964                 :          98 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3965                 :             :     }
    3966                 :             : 
    3967                 :        3283 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    3968                 :        6438 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    3969                 :             :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    3970                 :             :                                   oprnd0, def);
    3971                 :        3283 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3972                 :             : 
    3973                 :        3283 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    3974                 :        6438 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    3975                 :             :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    3976                 :             :                                   oprnd0, def2);
    3977                 :        3283 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3978                 :             : 
    3979                 :             :   /* Pattern detected.  */
    3980                 :        3283 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3981                 :             : 
    3982                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    3983                 :        3283 :   var = vect_recog_temp_ssa_var (utype, NULL);
    3984                 :        3283 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    3985                 :             : 
    3986                 :        3283 :   if (!useless_type_conversion_p (type, utype))
    3987                 :             :     {
    3988                 :          49 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    3989                 :          49 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    3990                 :          49 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    3991                 :             :     }
    3992                 :             :   return pattern_stmt;
    3993                 :             : }
    3994                 :             : 
    3995                 :             : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    3996                 :             :    vectorized:
    3997                 :             : 
    3998                 :             :    type a_t;
    3999                 :             :    TYPE b_T, res_T;
    4000                 :             : 
    4001                 :             :    S1 a_t = ;
    4002                 :             :    S2 b_T = ;
    4003                 :             :    S3 res_T = b_T op a_t;
    4004                 :             : 
    4005                 :             :   where type 'TYPE' is a type with different size than 'type',
    4006                 :             :   and op is <<, >> or rotate.
    4007                 :             : 
    4008                 :             :   Also detect cases:
    4009                 :             : 
    4010                 :             :    type a_t;
    4011                 :             :    TYPE b_T, c_T, res_T;
    4012                 :             : 
    4013                 :             :    S0 c_T = ;
    4014                 :             :    S1 a_t = (type) c_T;
    4015                 :             :    S2 b_T = ;
    4016                 :             :    S3 res_T = b_T op a_t;
    4017                 :             : 
    4018                 :             :   Input/Output:
    4019                 :             : 
    4020                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4021                 :             :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4022                 :             :     with a shift/rotate which has same type on both operands, in the
    4023                 :             :     second case just b_T op c_T, in the first case with added cast
    4024                 :             :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4025                 :             : 
    4026                 :             :   Output:
    4027                 :             : 
    4028                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4029                 :             : 
    4030                 :             :   * Return value: A new stmt that will be used to replace the shift/rotate
    4031                 :             :     S3 stmt.  */
    4032                 :             : 
    4033                 :             : static gimple *
    4034                 :    27447598 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4035                 :             :                                         stmt_vec_info stmt_vinfo,
    4036                 :             :                                         tree *type_out)
    4037                 :             : {
    4038                 :    27447598 :   gimple *last_stmt = stmt_vinfo->stmt;
    4039                 :    27447598 :   tree oprnd0, oprnd1, lhs, var;
    4040                 :    27447598 :   gimple *pattern_stmt;
    4041                 :    27447598 :   enum tree_code rhs_code;
    4042                 :             : 
    4043                 :    27447598 :   if (!is_gimple_assign (last_stmt))
    4044                 :             :     return NULL;
    4045                 :             : 
    4046                 :    18742549 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4047                 :    18742549 :   switch (rhs_code)
    4048                 :             :     {
    4049                 :      470715 :     case LSHIFT_EXPR:
    4050                 :      470715 :     case RSHIFT_EXPR:
    4051                 :      470715 :     case LROTATE_EXPR:
    4052                 :      470715 :     case RROTATE_EXPR:
    4053                 :      470715 :       break;
    4054                 :             :     default:
    4055                 :             :       return NULL;
    4056                 :             :     }
    4057                 :             : 
    4058                 :      470715 :   lhs = gimple_assign_lhs (last_stmt);
    4059                 :      470715 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4060                 :      470715 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4061                 :      470715 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4062                 :      422201 :       || TREE_CODE (oprnd1) != SSA_NAME
    4063                 :       47135 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4064                 :       18556 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4065                 :       18248 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4066                 :      488963 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4067                 :       18248 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4068                 :      452467 :     return NULL;
    4069                 :             : 
    4070                 :       18248 :   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
    4071                 :       18248 :   if (!def_vinfo)
    4072                 :             :     return NULL;
    4073                 :             : 
    4074                 :       16281 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4075                 :       16281 :   if (*type_out == NULL_TREE)
    4076                 :             :     return NULL;
    4077                 :             : 
    4078                 :       11043 :   tree def = NULL_TREE;
    4079                 :       11043 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4080                 :        9277 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4081                 :             :     {
    4082                 :        1976 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4083                 :        1976 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4084                 :        1976 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4085                 :         598 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4086                 :             :         {
    4087                 :         598 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4088                 :         598 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4089                 :             :             def = rhs1;
    4090                 :             :           else
    4091                 :             :             {
    4092                 :         593 :               tree mask
    4093                 :         593 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4094                 :         593 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4095                 :         593 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4096                 :         593 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4097                 :         593 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4098                 :         593 :                                                            TREE_TYPE (rhs1));
    4099                 :         593 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4100                 :             :             }
    4101                 :             :         }
    4102                 :             :     }
    4103                 :             : 
    4104                 :         598 :   if (def == NULL_TREE)
    4105                 :             :     {
    4106                 :       10445 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4107                 :       10445 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4108                 :       10445 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4109                 :             :     }
    4110                 :             : 
    4111                 :             :   /* Pattern detected.  */
    4112                 :       11043 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4113                 :             : 
    4114                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4115                 :       11043 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4116                 :       11043 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4117                 :             : 
    4118                 :       11043 :   return pattern_stmt;
    4119                 :             : }
    4120                 :             : 
    4121                 :             : /* Return true iff the target has a vector optab implementing the operation
    4122                 :             :    CODE on type VECTYPE.  */
    4123                 :             : 
    4124                 :             : static bool
    4125                 :      532886 : target_has_vecop_for_code (tree_code code, tree vectype)
    4126                 :             : {
    4127                 :      532886 :   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
    4128                 :      532886 :   return voptab
    4129                 :      532886 :          && can_implement_p (voptab, TYPE_MODE (vectype));
    4130                 :             : }
    4131                 :             : 
    4132                 :             : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4133                 :             :    needed by the multiplication-by-immediate synthesis algorithm described by
    4134                 :             :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4135                 :             :    present.  Return true iff the target supports all the steps.  */
    4136                 :             : 
    4137                 :             : static bool
    4138                 :      232507 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4139                 :             :                                  tree vectype, bool synth_shift_p)
    4140                 :             : {
    4141                 :      232507 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4142                 :             :     return false;
    4143                 :             : 
    4144                 :      232507 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4145                 :      232507 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4146                 :             : 
    4147                 :      232507 :   if (var == negate_variant
    4148                 :      232507 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4149                 :             :     return false;
    4150                 :             : 
    4151                 :             :   /* If we must synthesize shifts with additions make sure that vector
    4152                 :             :      addition is available.  */
    4153                 :      232089 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4154                 :             :     return false;
    4155                 :             : 
    4156                 :      129964 :   for (int i = 1; i < alg->ops; i++)
    4157                 :             :     {
    4158                 :      101863 :       switch (alg->op[i])
    4159                 :             :         {
    4160                 :             :         case alg_shift:
    4161                 :             :           break;
    4162                 :       26796 :         case alg_add_t_m2:
    4163                 :       26796 :         case alg_add_t2_m:
    4164                 :       26796 :         case alg_add_factor:
    4165                 :       26796 :           if (!supports_vplus)
    4166                 :             :             return false;
    4167                 :             :           break;
    4168                 :       17105 :         case alg_sub_t_m2:
    4169                 :       17105 :         case alg_sub_t2_m:
    4170                 :       17105 :         case alg_sub_factor:
    4171                 :       17105 :           if (!supports_vminus)
    4172                 :             :             return false;
    4173                 :             :           break;
    4174                 :             :         case alg_unknown:
    4175                 :             :         case alg_m:
    4176                 :             :         case alg_zero:
    4177                 :             :         case alg_impossible:
    4178                 :             :           return false;
    4179                 :           0 :         default:
    4180                 :           0 :           gcc_unreachable ();
    4181                 :             :         }
    4182                 :             :     }
    4183                 :             : 
    4184                 :             :   return true;
    4185                 :             : }
    4186                 :             : 
    4187                 :             : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4188                 :             :    putting the final result in DEST.  Append all statements but the last into
    4189                 :             :    VINFO.  Return the last statement.  */
    4190                 :             : 
    4191                 :             : static gimple *
    4192                 :           0 : synth_lshift_by_additions (vec_info *vinfo,
    4193                 :             :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4194                 :             :                            stmt_vec_info stmt_info)
    4195                 :             : {
    4196                 :           0 :   HOST_WIDE_INT i;
    4197                 :           0 :   tree itype = TREE_TYPE (op);
    4198                 :           0 :   tree prev_res = op;
    4199                 :           0 :   gcc_assert (amnt >= 0);
    4200                 :           0 :   for (i = 0; i < amnt; i++)
    4201                 :             :     {
    4202                 :           0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4203                 :           0 :                       : dest;
    4204                 :           0 :       gimple *stmt
    4205                 :           0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4206                 :           0 :       prev_res = tmp_var;
    4207                 :           0 :       if (i < amnt - 1)
    4208                 :           0 :         append_pattern_def_seq (vinfo, stmt_info, stmt);
    4209                 :             :       else
    4210                 :           0 :         return stmt;
    4211                 :             :     }
    4212                 :           0 :   gcc_unreachable ();
    4213                 :             :   return NULL;
    4214                 :             : }
    4215                 :             : 
    4216                 :             : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4217                 :             :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4218                 :             :    the process if necessary.  Append the resulting assignment statements
    4219                 :             :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4220                 :             :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4221                 :             :    left shifts using additions.  */
    4222                 :             : 
    4223                 :             : static tree
    4224                 :       43808 : apply_binop_and_append_stmt (vec_info *vinfo,
    4225                 :             :                              tree_code code, tree op1, tree op2,
    4226                 :             :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    4227                 :             : {
    4228                 :       43808 :   if (integer_zerop (op2)
    4229                 :       43808 :       && (code == LSHIFT_EXPR
    4230                 :       38346 :           || code == PLUS_EXPR))
    4231                 :             :     {
    4232                 :       38346 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4233                 :             :       return op1;
    4234                 :             :     }
    4235                 :             : 
    4236                 :        5462 :   gimple *stmt;
    4237                 :        5462 :   tree itype = TREE_TYPE (op1);
    4238                 :        5462 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4239                 :             : 
    4240                 :        5462 :   if (code == LSHIFT_EXPR
    4241                 :        5462 :       && synth_shift_p)
    4242                 :             :     {
    4243                 :           0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4244                 :           0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
    4245                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4246                 :           0 :       return tmp_var;
    4247                 :             :     }
    4248                 :             : 
    4249                 :        5462 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4250                 :        5462 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4251                 :        5462 :   return tmp_var;
    4252                 :             : }
    4253                 :             : 
    4254                 :             : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4255                 :             :    and simple arithmetic operations to be vectorized.  Record the statements
    4256                 :             :    produced in STMT_VINFO and return the last statement in the sequence or
    4257                 :             :    NULL if it's not possible to synthesize such a multiplication.
    4258                 :             :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4259                 :             :    works on tree-ssa form.  */
    4260                 :             : 
    4261                 :             : static gimple *
    4262                 :      235063 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4263                 :             :                              stmt_vec_info stmt_vinfo)
    4264                 :             : {
    4265                 :      235063 :   tree itype = TREE_TYPE (op);
    4266                 :      235063 :   machine_mode mode = TYPE_MODE (itype);
    4267                 :      235063 :   struct algorithm alg;
    4268                 :      235063 :   mult_variant variant;
    4269                 :      235063 :   if (!tree_fits_shwi_p (val))
    4270                 :             :     return NULL;
    4271                 :             : 
    4272                 :             :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4273                 :             :      signed overflow where the original operation didn't.  Perform the
    4274                 :             :      operations on an unsigned type and cast back to avoid this.
    4275                 :             :      In the future we may want to relax this for synthesis algorithms
    4276                 :             :      that we can prove do not cause unexpected overflow.  */
    4277                 :      232507 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4278                 :             : 
    4279                 :       49592 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4280                 :      232507 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4281                 :      232507 :   if (!vectype)
    4282                 :             :     return NULL;
    4283                 :             : 
    4284                 :             :   /* Targets that don't support vector shifts but support vector additions
    4285                 :             :      can synthesize shifts that way.  */
    4286                 :      232507 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4287                 :             : 
    4288                 :      232507 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4289                 :             :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4290                 :             :      The vectorizer's benefit analysis will decide whether it's beneficial
    4291                 :             :      to do this.  */
    4292                 :      465014 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4293                 :      232507 :                                        ? TYPE_MODE (vectype) : mode,
    4294                 :             :                                        hwval, &alg, &variant, MAX_COST);
    4295                 :      232507 :   if (!possible)
    4296                 :             :     return NULL;
    4297                 :             : 
    4298                 :      232507 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4299                 :             :     return NULL;
    4300                 :             : 
    4301                 :       28101 :   tree accumulator;
    4302                 :             : 
    4303                 :             :   /* Clear out the sequence of statements so we can populate it below.  */
    4304                 :       28101 :   gimple *stmt = NULL;
    4305                 :             : 
    4306                 :       28101 :   if (cast_to_unsigned_p)
    4307                 :             :     {
    4308                 :       11383 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4309                 :       11383 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4310                 :       11383 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4311                 :       11383 :       op = tmp_op;
    4312                 :             :     }
    4313                 :             : 
    4314                 :       28101 :   if (alg.op[0] == alg_zero)
    4315                 :         173 :     accumulator = build_int_cst (multtype, 0);
    4316                 :             :   else
    4317                 :             :     accumulator = op;
    4318                 :             : 
    4319                 :       28101 :   bool needs_fixup = (variant == negate_variant)
    4320                 :       28101 :                       || (variant == add_variant);
    4321                 :             : 
    4322                 :      129811 :   for (int i = 1; i < alg.ops; i++)
    4323                 :             :     {
    4324                 :      101710 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4325                 :      101710 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4326                 :      101710 :       tree tmp_var = NULL_TREE;
    4327                 :             : 
    4328                 :      101710 :       switch (alg.op[i])
    4329                 :             :         {
    4330                 :       57902 :         case alg_shift:
    4331                 :       57902 :           if (synth_shift_p)
    4332                 :           0 :             stmt
    4333                 :           0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4334                 :           0 :                                            alg.log[i], stmt_vinfo);
    4335                 :             :           else
    4336                 :       57902 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4337                 :             :                                          shft_log);
    4338                 :             :           break;
    4339                 :       22104 :         case alg_add_t_m2:
    4340                 :       22104 :           tmp_var
    4341                 :       22104 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4342                 :             :                                            stmt_vinfo, synth_shift_p);
    4343                 :       22104 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4344                 :             :                                        tmp_var);
    4345                 :       22104 :           break;
    4346                 :       16420 :         case alg_sub_t_m2:
    4347                 :       16420 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4348                 :             :                                                  shft_log, stmt_vinfo,
    4349                 :             :                                                  synth_shift_p);
    4350                 :             :           /* In some algorithms the first step involves zeroing the
    4351                 :             :              accumulator.  If subtracting from such an accumulator
    4352                 :             :              just emit the negation directly.  */
    4353                 :       16420 :           if (integer_zerop (accumulator))
    4354                 :         173 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4355                 :             :           else
    4356                 :       16247 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4357                 :             :                                         tmp_var);
    4358                 :             :           break;
    4359                 :           0 :         case alg_add_t2_m:
    4360                 :           0 :           tmp_var
    4361                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4362                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4363                 :           0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4364                 :           0 :           break;
    4365                 :           0 :         case alg_sub_t2_m:
    4366                 :           0 :           tmp_var
    4367                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4368                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4369                 :           0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4370                 :           0 :           break;
    4371                 :        4634 :         case alg_add_factor:
    4372                 :        4634 :           tmp_var
    4373                 :        4634 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4374                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4375                 :        4634 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4376                 :             :                                        tmp_var);
    4377                 :        4634 :           break;
    4378                 :         650 :         case alg_sub_factor:
    4379                 :         650 :           tmp_var
    4380                 :         650 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4381                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4382                 :         650 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4383                 :             :                                       accumulator);
    4384                 :         650 :           break;
    4385                 :           0 :         default:
    4386                 :           0 :           gcc_unreachable ();
    4387                 :             :         }
    4388                 :             :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4389                 :             :          but rather return it directly.  */
    4390                 :             : 
    4391                 :      101710 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4392                 :       85193 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4393                 :      101710 :       accumulator = accum_tmp;
    4394                 :             :     }
    4395                 :       28101 :   if (variant == negate_variant)
    4396                 :             :     {
    4397                 :         326 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4398                 :         326 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4399                 :         326 :       accumulator = accum_tmp;
    4400                 :         326 :       if (cast_to_unsigned_p)
    4401                 :         135 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4402                 :             :     }
    4403                 :       27775 :   else if (variant == add_variant)
    4404                 :             :     {
    4405                 :          83 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4406                 :          83 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4407                 :          83 :       accumulator = accum_tmp;
    4408                 :          83 :       if (cast_to_unsigned_p)
    4409                 :          73 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4410                 :             :     }
    4411                 :             :   /* Move back to a signed if needed.  */
    4412                 :       27900 :   if (cast_to_unsigned_p)
    4413                 :             :     {
    4414                 :       11383 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4415                 :       11383 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4416                 :             :     }
    4417                 :             : 
    4418                 :             :   return stmt;
    4419                 :             : }
    4420                 :             : 
    4421                 :             : /* Detect multiplication by constant and convert it into a sequence of
    4422                 :             :    shifts and additions, subtractions, negations.  We reuse the
    4423                 :             :    choose_mult_variant algorithms from expmed.cc
    4424                 :             : 
    4425                 :             :    Input/Output:
    4426                 :             : 
    4427                 :             :    STMT_VINFO: The stmt from which the pattern search begins,
    4428                 :             :    i.e. the mult stmt.
    4429                 :             : 
    4430                 :             :  Output:
    4431                 :             : 
    4432                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4433                 :             : 
    4434                 :             :   * Return value: A new stmt that will be used to replace
    4435                 :             :     the multiplication.  */
    4436                 :             : 
    4437                 :             : static gimple *
    4438                 :    27578497 : vect_recog_mult_pattern (vec_info *vinfo,
    4439                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4440                 :             : {
    4441                 :    27578497 :   gimple *last_stmt = stmt_vinfo->stmt;
    4442                 :    27578497 :   tree oprnd0, oprnd1, vectype, itype;
    4443                 :    27578497 :   gimple *pattern_stmt;
    4444                 :             : 
    4445                 :    27578497 :   if (!is_gimple_assign (last_stmt))
    4446                 :             :     return NULL;
    4447                 :             : 
    4448                 :    18873448 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4449                 :             :     return NULL;
    4450                 :             : 
    4451                 :     1230628 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4452                 :     1230628 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4453                 :     1230628 :   itype = TREE_TYPE (oprnd0);
    4454                 :             : 
    4455                 :     1230628 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4456                 :     1230565 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4457                 :      725096 :       || !INTEGRAL_TYPE_P (itype)
    4458                 :     1955724 :       || !type_has_mode_precision_p (itype))
    4459                 :      505584 :     return NULL;
    4460                 :             : 
    4461                 :      725044 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4462                 :      725044 :   if (vectype == NULL_TREE)
    4463                 :             :     return NULL;
    4464                 :             : 
    4465                 :             :   /* If the target can handle vectorized multiplication natively,
    4466                 :             :      don't attempt to optimize this.  */
    4467                 :      591547 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4468                 :      591547 :   if (mul_optab != unknown_optab
    4469                 :      591547 :       && can_implement_p (mul_optab, TYPE_MODE (vectype)))
    4470                 :             :     return NULL;
    4471                 :             : 
    4472                 :      235063 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4473                 :             :                                               oprnd0, oprnd1, stmt_vinfo);
    4474                 :      235063 :   if (!pattern_stmt)
    4475                 :             :     return NULL;
    4476                 :             : 
    4477                 :             :   /* Pattern detected.  */
    4478                 :       28101 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4479                 :             : 
    4480                 :       28101 :   *type_out = vectype;
    4481                 :             : 
    4482                 :       28101 :   return pattern_stmt;
    4483                 :             : }
    4484                 :             : 
    4485                 :             : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
    4486                 :             : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
    4487                 :             : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4488                 :             : 
    4489                 :             : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
    4490                 :             : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
    4491                 :             : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
    4492                 :             : 
    4493                 :             : static gimple *
    4494                 :         239 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
    4495                 :             :                                      internal_fn fn, tree *type_out,
    4496                 :             :                                      tree lhs, tree op_0, tree op_1)
    4497                 :             : {
    4498                 :         239 :   tree itype = TREE_TYPE (op_0);
    4499                 :         239 :   tree otype = TREE_TYPE (lhs);
    4500                 :         239 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4501                 :         239 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4502                 :             : 
    4503                 :         239 :   if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4504                 :         239 :     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    4505                 :             :     {
    4506                 :          61 :       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
    4507                 :          61 :       tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
    4508                 :             : 
    4509                 :          61 :       gimple_call_set_lhs (call, in_ssa);
    4510                 :          61 :       gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4511                 :          61 :       gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
    4512                 :             : 
    4513                 :          61 :       *type_out = v_otype;
    4514                 :             : 
    4515                 :          61 :       if (types_compatible_p (itype, otype))
    4516                 :             :         return call;
    4517                 :             :       else
    4518                 :             :         {
    4519                 :           0 :           append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
    4520                 :           0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4521                 :             : 
    4522                 :           0 :           return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
    4523                 :             :         }
    4524                 :             :     }
    4525                 :             : 
    4526                 :             :   return NULL;
    4527                 :             : }
    4528                 :             : 
    4529                 :             : /*
    4530                 :             :  * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
    4531                 :             :  *   _7 = _4 + _6;
    4532                 :             :  *   _8 = _4 > _7;
    4533                 :             :  *   _9 = (long unsigned int) _8;
    4534                 :             :  *   _10 = -_9;
    4535                 :             :  *   _12 = _7 | _10;
    4536                 :             :  *
    4537                 :             :  * And then simplied to
    4538                 :             :  *   _12 = .SAT_ADD (_4, _6);
    4539                 :             :  */
    4540                 :             : 
    4541                 :             : static gimple *
    4542                 :    27656364 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4543                 :             :                             tree *type_out)
    4544                 :             : {
    4545                 :    27656364 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4546                 :             : 
    4547                 :    27656364 :   if (!is_gimple_assign (last_stmt))
    4548                 :             :     return NULL;
    4549                 :             : 
    4550                 :    18951315 :   tree ops[2];
    4551                 :    18951315 :   tree lhs = gimple_assign_lhs (last_stmt);
    4552                 :             : 
    4553                 :    18951315 :   if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
    4554                 :    18951315 :       || gimple_signed_integer_sat_add (lhs, ops, NULL))
    4555                 :             :     {
    4556                 :          50 :       if (TREE_CODE (ops[1]) == INTEGER_CST)
    4557                 :          12 :         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
    4558                 :             : 
    4559                 :          50 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4560                 :             :                                                           IFN_SAT_ADD, type_out,
    4561                 :             :                                                           lhs, ops[0], ops[1]);
    4562                 :          50 :       if (stmt)
    4563                 :             :         {
    4564                 :          32 :           vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
    4565                 :          32 :           return stmt;
    4566                 :             :         }
    4567                 :             :     }
    4568                 :             : 
    4569                 :             :   return NULL;
    4570                 :             : }
    4571                 :             : 
    4572                 :             : /*
    4573                 :             :  * Try to transform the truncation for .SAT_SUB pattern,  mostly occurs in
    4574                 :             :  * the benchmark zip.  Aka:
    4575                 :             :  *
    4576                 :             :  *   unsigned int _1;
    4577                 :             :  *   unsigned int _2;
    4578                 :             :  *   unsigned short int _4;
    4579                 :             :  *   _9 = (unsigned short int).SAT_SUB (_1, _2);
    4580                 :             :  *
    4581                 :             :  *   if _1 is known to be in the range of unsigned short int.  For example
    4582                 :             :  *   there is a def _1 = (unsigned short int)_4.  Then we can transform the
    4583                 :             :  *   truncation to:
    4584                 :             :  *
    4585                 :             :  *   _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
    4586                 :             :  *   _9 = .SAT_SUB (_4, _3);
    4587                 :             :  *
    4588                 :             :  *   Then,  we can better vectorized code and avoid the unnecessary narrowing
    4589                 :             :  *   stmt during vectorization with below stmt(s).
    4590                 :             :  *
    4591                 :             :  *   _3 = .SAT_TRUNC(_2); // SI => HI
    4592                 :             :  *   _9 = .SAT_SUB (_4, _3);
    4593                 :             :  */
    4594                 :             : static void
    4595                 :         189 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
    4596                 :             :                                       stmt_vec_info stmt_vinfo,
    4597                 :             :                                       tree lhs, tree *ops)
    4598                 :             : {
    4599                 :         189 :   tree otype = TREE_TYPE (lhs);
    4600                 :         189 :   tree itype = TREE_TYPE (ops[0]);
    4601                 :         189 :   unsigned itype_prec = TYPE_PRECISION (itype);
    4602                 :         189 :   unsigned otype_prec = TYPE_PRECISION (otype);
    4603                 :             : 
    4604                 :         189 :   if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
    4605                 :         189 :     return;
    4606                 :             : 
    4607                 :           0 :   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4608                 :           0 :   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4609                 :           0 :   tree_pair v_pair = tree_pair (v_otype, v_itype);
    4610                 :             : 
    4611                 :           0 :   if (v_otype == NULL_TREE || v_itype == NULL_TREE
    4612                 :           0 :     || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
    4613                 :             :                                         OPTIMIZE_FOR_BOTH))
    4614                 :           0 :     return;
    4615                 :             : 
    4616                 :             :   /* 1. Find the _4 and update ops[0] as above example.  */
    4617                 :           0 :   vect_unpromoted_value unprom;
    4618                 :           0 :   tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
    4619                 :             : 
    4620                 :           0 :   if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
    4621                 :             :     return;
    4622                 :             : 
    4623                 :           0 :   ops[0] = tmp;
    4624                 :             : 
    4625                 :             :   /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example.  */
    4626                 :           0 :   tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4627                 :           0 :   gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
    4628                 :             : 
    4629                 :           0 :   gimple_call_set_lhs (call, trunc_lhs_ssa);
    4630                 :           0 :   gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4631                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
    4632                 :             : 
    4633                 :           0 :   ops[1] = trunc_lhs_ssa;
    4634                 :             : }
    4635                 :             : 
    4636                 :             : /*
    4637                 :             :  * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
    4638                 :             :  * Unsigned:
    4639                 :             :  *   _7 = _1 >= _2;
    4640                 :             :  *   _8 = _1 - _2;
    4641                 :             :  *   _10 = (long unsigned int) _7;
    4642                 :             :  *   _9 = _8 * _10;
    4643                 :             :  *
    4644                 :             :  * And then simplied to
    4645                 :             :  *   _9 = .SAT_SUB (_1, _2);
    4646                 :             :  *
    4647                 :             :  * Signed:
    4648                 :             :  *   x.0_4 = (unsigned char) x_16;
    4649                 :             :  *   y.1_5 = (unsigned char) y_18;
    4650                 :             :  *   _6 = x.0_4 - y.1_5;
    4651                 :             :  *   minus_19 = (int8_t) _6;
    4652                 :             :  *   _7 = x_16 ^ y_18;
    4653                 :             :  *   _8 = x_16 ^ minus_19;
    4654                 :             :  *   _44 = _7 < 0;
    4655                 :             :  *   _23 = x_16 < 0;
    4656                 :             :  *   _24 = (signed char) _23;
    4657                 :             :  *   _58 = (unsigned char) _24;
    4658                 :             :  *   _59 = -_58;
    4659                 :             :  *   _25 = (signed char) _59;
    4660                 :             :  *   _26 = _25 ^ 127;
    4661                 :             :  *   _42 = _8 < 0;
    4662                 :             :  *   _41 = _42 & _44;
    4663                 :             :  *   iftmp.2_11 = _41 ? _26 : minus_19;
    4664                 :             :  *
    4665                 :             :  * And then simplied to
    4666                 :             :  *   iftmp.2_11 = .SAT_SUB (x_16, y_18);
    4667                 :             :  */
    4668                 :             : 
    4669                 :             : static gimple *
    4670                 :    27656332 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4671                 :             :                             tree *type_out)
    4672                 :             : {
    4673                 :    27656332 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4674                 :             : 
    4675                 :    27656332 :   if (!is_gimple_assign (last_stmt))
    4676                 :             :     return NULL;
    4677                 :             : 
    4678                 :    18951283 :   tree ops[2];
    4679                 :    18951283 :   tree lhs = gimple_assign_lhs (last_stmt);
    4680                 :             : 
    4681                 :    18951283 :   if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
    4682                 :    18951283 :       || gimple_signed_integer_sat_sub (lhs, ops, NULL))
    4683                 :             :     {
    4684                 :         189 :       vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
    4685                 :         189 :       gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
    4686                 :             :                                                           IFN_SAT_SUB, type_out,
    4687                 :             :                                                           lhs, ops[0], ops[1]);
    4688                 :         189 :       if (stmt)
    4689                 :             :         {
    4690                 :          29 :           vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
    4691                 :          29 :           return stmt;
    4692                 :             :         }
    4693                 :             :     }
    4694                 :             : 
    4695                 :             :   return NULL;
    4696                 :             : }
    4697                 :             : 
    4698                 :             : /*
    4699                 :             :  * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
    4700                 :             :  *   overflow_5 = x_4(D) > 4294967295;
    4701                 :             :  *   _1 = (unsigned int) x_4(D);
    4702                 :             :  *   _2 = (unsigned int) overflow_5;
    4703                 :             :  *   _3 = -_2;
    4704                 :             :  *   _6 = _1 | _3;
    4705                 :             :  *
    4706                 :             :  * And then simplied to
    4707                 :             :  *   _6 = .SAT_TRUNC (x_4(D));
    4708                 :             :  */
    4709                 :             : 
    4710                 :             : static gimple *
    4711                 :    27656303 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    4712                 :             :                               tree *type_out)
    4713                 :             : {
    4714                 :    27656303 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    4715                 :             : 
    4716                 :    27656303 :   if (!is_gimple_assign (last_stmt))
    4717                 :             :     return NULL;
    4718                 :             : 
    4719                 :    18951254 :   tree ops[1];
    4720                 :    18951254 :   tree lhs = gimple_assign_lhs (last_stmt);
    4721                 :    18951254 :   tree otype = TREE_TYPE (lhs);
    4722                 :             : 
    4723                 :    18951254 :   if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    4724                 :    18951039 :        || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
    4725                 :    18951254 :       && type_has_mode_precision_p (otype))
    4726                 :             :     {
    4727                 :         203 :       tree itype = TREE_TYPE (ops[0]);
    4728                 :         203 :       tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
    4729                 :         203 :       tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
    4730                 :         203 :       internal_fn fn = IFN_SAT_TRUNC;
    4731                 :             : 
    4732                 :         197 :       if (v_itype != NULL_TREE && v_otype != NULL_TREE
    4733                 :         400 :         && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
    4734                 :             :                                            OPTIMIZE_FOR_BOTH))
    4735                 :             :         {
    4736                 :           0 :           gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
    4737                 :           0 :           tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
    4738                 :             : 
    4739                 :           0 :           gimple_call_set_lhs (call, out_ssa);
    4740                 :           0 :           gimple_call_set_nothrow (call, /* nothrow_p */ false);
    4741                 :           0 :           gimple_set_location (call, gimple_location (last_stmt));
    4742                 :             : 
    4743                 :           0 :           *type_out = v_otype;
    4744                 :             : 
    4745                 :           0 :           return call;
    4746                 :             :         }
    4747                 :             :     }
    4748                 :             : 
    4749                 :             :   return NULL;
    4750                 :             : }
    4751                 :             : 
    4752                 :             : /* Detect a signed division by a constant that wouldn't be
    4753                 :             :    otherwise vectorized:
    4754                 :             : 
    4755                 :             :    type a_t, b_t;
    4756                 :             : 
    4757                 :             :    S1 a_t = b_t / N;
    4758                 :             : 
    4759                 :             :   where type 'type' is an integral type and N is a constant.
    4760                 :             : 
    4761                 :             :   Similarly handle modulo by a constant:
    4762                 :             : 
    4763                 :             :    S4 a_t = b_t % N;
    4764                 :             : 
    4765                 :             :   Input/Output:
    4766                 :             : 
    4767                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4768                 :             :     i.e. the division stmt.  S1 is replaced by if N is a power
    4769                 :             :     of two constant and type is signed:
    4770                 :             :   S3  y_t = b_t < 0 ? N - 1 : 0;
    4771                 :             :   S2  x_t = b_t + y_t;
    4772                 :             :   S1' a_t = x_t >> log2 (N);
    4773                 :             : 
    4774                 :             :     S4 is replaced if N is a power of two constant and
    4775                 :             :     type is signed by (where *_T temporaries have unsigned type):
    4776                 :             :   S9  y_T = b_t < 0 ? -1U : 0U;
    4777                 :             :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    4778                 :             :   S7  z_t = (type) z_T;
    4779                 :             :   S6  w_t = b_t + z_t;
    4780                 :             :   S5  x_t = w_t & (N - 1);
    4781                 :             :   S4' a_t = x_t - z_t;
    4782                 :             : 
    4783                 :             :   Output:
    4784                 :             : 
    4785                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4786                 :             : 
    4787                 :             :   * Return value: A new stmt that will be used to replace the division
    4788                 :             :     S1 or modulo S4 stmt.  */
    4789                 :             : 
    4790                 :             : static gimple *
    4791                 :    27447593 : vect_recog_divmod_pattern (vec_info *vinfo,
    4792                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    4793                 :             : {
    4794                 :    27447593 :   gimple *last_stmt = stmt_vinfo->stmt;
    4795                 :    27447593 :   tree oprnd0, oprnd1, vectype, itype, cond;
    4796                 :    27447593 :   gimple *pattern_stmt, *def_stmt;
    4797                 :    27447593 :   enum tree_code rhs_code;
    4798                 :    27447593 :   optab optab;
    4799                 :    27447593 :   tree q, cst;
    4800                 :    27447593 :   int prec;
    4801                 :             : 
    4802                 :    27447593 :   if (!is_gimple_assign (last_stmt))
    4803                 :             :     return NULL;
    4804                 :             : 
    4805                 :    18742544 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4806                 :    18742544 :   switch (rhs_code)
    4807                 :             :     {
    4808                 :      245773 :     case TRUNC_DIV_EXPR:
    4809                 :      245773 :     case EXACT_DIV_EXPR:
    4810                 :      245773 :     case TRUNC_MOD_EXPR:
    4811                 :      245773 :       break;
    4812                 :             :     default:
    4813                 :             :       return NULL;
    4814                 :             :     }
    4815                 :             : 
    4816                 :      245773 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4817                 :      245773 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4818                 :      245773 :   itype = TREE_TYPE (oprnd0);
    4819                 :      245773 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4820                 :      229144 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4821                 :      136526 :       || TREE_CODE (itype) != INTEGER_TYPE
    4822                 :      382299 :       || !type_has_mode_precision_p (itype))
    4823                 :      109247 :     return NULL;
    4824                 :             : 
    4825                 :      136526 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    4826                 :      136526 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4827                 :      136526 :   if (vectype == NULL_TREE)
    4828                 :             :     return NULL;
    4829                 :             : 
    4830                 :      111437 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    4831                 :             :     {
    4832                 :             :       /* If the target can handle vectorized division or modulo natively,
    4833                 :             :          don't attempt to optimize this, since native division is likely
    4834                 :             :          to give smaller code.  */
    4835                 :        1134 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    4836                 :        1134 :       if (optab != unknown_optab
    4837                 :        1134 :           && can_implement_p (optab, TYPE_MODE (vectype)))
    4838                 :             :         return NULL;
    4839                 :             :     }
    4840                 :             : 
    4841                 :      111437 :   prec = TYPE_PRECISION (itype);
    4842                 :      111437 :   if (integer_pow2p (oprnd1))
    4843                 :             :     {
    4844                 :       53417 :       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
    4845                 :        3640 :         return NULL;
    4846                 :             : 
    4847                 :             :       /* Pattern detected.  */
    4848                 :       49777 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    4849                 :             : 
    4850                 :       49777 :       *type_out = vectype;
    4851                 :             : 
    4852                 :             :       /* Check if the target supports this internal function.  */
    4853                 :       49777 :       internal_fn ifn = IFN_DIV_POW2;
    4854                 :       49777 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    4855                 :             :         {
    4856                 :           0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    4857                 :             : 
    4858                 :           0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    4859                 :           0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    4860                 :           0 :           gimple_call_set_lhs (div_stmt, var_div);
    4861                 :             : 
    4862                 :           0 :           if (rhs_code == TRUNC_MOD_EXPR)
    4863                 :             :             {
    4864                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    4865                 :           0 :               def_stmt
    4866                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4867                 :             :                                        LSHIFT_EXPR, var_div, shift);
    4868                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4869                 :           0 :               pattern_stmt
    4870                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4871                 :             :                                        MINUS_EXPR, oprnd0,
    4872                 :             :                                        gimple_assign_lhs (def_stmt));
    4873                 :             :             }
    4874                 :             :           else
    4875                 :             :             pattern_stmt = div_stmt;
    4876                 :           0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    4877                 :             : 
    4878                 :           0 :           return pattern_stmt;
    4879                 :             :         }
    4880                 :             : 
    4881                 :       49777 :       cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    4882                 :       49777 :       def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
    4883                 :       49777 :                                       build_int_cst (itype, 0));
    4884                 :       49777 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
    4885                 :             :                               truth_type_for (vectype), itype);
    4886                 :       49777 :       if (rhs_code == TRUNC_DIV_EXPR
    4887                 :       49777 :           || rhs_code == EXACT_DIV_EXPR)
    4888                 :             :         {
    4889                 :       47183 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    4890                 :       47183 :           tree shift;
    4891                 :       47183 :           def_stmt
    4892                 :       47183 :             = gimple_build_assign (var, COND_EXPR, cond,
    4893                 :       47183 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4894                 :             :                                                 build_int_cst (itype, 1)),
    4895                 :       47183 :                                    build_int_cst (itype, 0));
    4896                 :       47183 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4897                 :       47183 :           var = vect_recog_temp_ssa_var (itype, NULL);
    4898                 :       47183 :           def_stmt
    4899                 :       47183 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    4900                 :             :                                    gimple_assign_lhs (def_stmt));
    4901                 :       47183 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4902                 :             : 
    4903                 :       47183 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    4904                 :       47183 :           pattern_stmt
    4905                 :       47183 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4906                 :             :                                    RSHIFT_EXPR, var, shift);
    4907                 :             :         }
    4908                 :             :       else
    4909                 :             :         {
    4910                 :        2594 :           tree signmask;
    4911                 :        2594 :           if (compare_tree_int (oprnd1, 2) == 0)
    4912                 :             :             {
    4913                 :        1091 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4914                 :        1091 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    4915                 :        1091 :                                               build_int_cst (itype, 1),
    4916                 :        1091 :                                               build_int_cst (itype, 0));
    4917                 :        1091 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4918                 :             :             }
    4919                 :             :           else
    4920                 :             :             {
    4921                 :        1503 :               tree utype
    4922                 :        1503 :                 = build_nonstandard_integer_type (prec, 1);
    4923                 :        1503 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    4924                 :        1503 :               tree shift
    4925                 :        1503 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    4926                 :        1503 :                                         - tree_log2 (oprnd1));
    4927                 :        1503 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    4928                 :             : 
    4929                 :        1503 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    4930                 :        1503 :                                               build_int_cst (utype, -1),
    4931                 :        1503 :                                               build_int_cst (utype, 0));
    4932                 :        1503 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4933                 :        1503 :               var = vect_recog_temp_ssa_var (utype, NULL);
    4934                 :        1503 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    4935                 :             :                                               gimple_assign_lhs (def_stmt),
    4936                 :             :                                               shift);
    4937                 :        1503 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4938                 :        1503 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4939                 :        1503 :               def_stmt
    4940                 :        1503 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    4941                 :        1503 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4942                 :             :             }
    4943                 :        2594 :           def_stmt
    4944                 :        2594 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4945                 :             :                                    PLUS_EXPR, oprnd0, signmask);
    4946                 :        2594 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4947                 :        2594 :           def_stmt
    4948                 :        2594 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4949                 :             :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    4950                 :        2594 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4951                 :             :                                                 build_int_cst (itype, 1)));
    4952                 :        2594 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4953                 :             : 
    4954                 :        2594 :           pattern_stmt
    4955                 :        2594 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4956                 :             :                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
    4957                 :             :                                    signmask);
    4958                 :             :         }
    4959                 :             : 
    4960                 :       49777 :       return pattern_stmt;
    4961                 :             :     }
    4962                 :             : 
    4963                 :       58020 :   if ((cst = uniform_integer_cst_p (oprnd1))
    4964                 :       58020 :       && TYPE_UNSIGNED (itype)
    4965                 :             :       && rhs_code == TRUNC_DIV_EXPR
    4966                 :       34088 :       && vectype
    4967                 :       80510 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    4968                 :             :     {
    4969                 :             :       /* We can use the relationship:
    4970                 :             : 
    4971                 :             :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    4972                 :             : 
    4973                 :             :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    4974                 :             :          is therefore a shift right.  When operating in modes that are
    4975                 :             :          multiples of a byte in size, there are two cases:
    4976                 :             : 
    4977                 :             :          (1) N(N+3) is not representable, in which case the question
    4978                 :             :              becomes whether the replacement expression overflows.
    4979                 :             :              It is enough to test that x+N+2 does not overflow,
    4980                 :             :              i.e. that x < MAX-(N+1).
    4981                 :             : 
    4982                 :             :          (2) N(N+3) is representable, in which case it is the (only)
    4983                 :             :              bound that we need to check.
    4984                 :             : 
    4985                 :             :          ??? For now we just handle the case where // (N+1) is a shift
    4986                 :             :          right by half the precision, since some architectures can
    4987                 :             :          optimize the associated addition and shift combinations
    4988                 :             :          into single instructions.  */
    4989                 :             : 
    4990                 :       15106 :       auto wcst = wi::to_wide (cst);
    4991                 :       15106 :       int pow = wi::exact_log2 (wcst + 1);
    4992                 :       15106 :       if (pow == prec / 2)
    4993                 :             :         {
    4994                 :         472 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    4995                 :             : 
    4996                 :         472 :           gimple_ranger ranger;
    4997                 :         472 :           int_range_max r;
    4998                 :             : 
    4999                 :             :           /* Check that no overflow will occur.  If we don't have range
    5000                 :             :              information we can't perform the optimization.  */
    5001                 :             : 
    5002                 :         472 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    5003                 :             :             {
    5004                 :         470 :               wide_int max = r.upper_bound ();
    5005                 :         470 :               wide_int one = wi::shwi (1, prec);
    5006                 :         470 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    5007                 :         470 :               wi::overflow_type ovf;
    5008                 :         470 :               wi::add (max, adder, UNSIGNED, &ovf);
    5009                 :         470 :               if (ovf == wi::OVF_NONE)
    5010                 :             :                 {
    5011                 :         305 :                   *type_out = vectype;
    5012                 :         305 :                   tree tadder = wide_int_to_tree (itype, adder);
    5013                 :         305 :                   tree rshift = wide_int_to_tree (itype, pow);
    5014                 :             : 
    5015                 :         305 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    5016                 :         305 :                   gassign *patt1
    5017                 :         305 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    5018                 :         305 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5019                 :             : 
    5020                 :         305 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    5021                 :         305 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    5022                 :             :                                                rshift);
    5023                 :         305 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5024                 :             : 
    5025                 :         305 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    5026                 :         305 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    5027                 :             :                                                oprnd0);
    5028                 :         305 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    5029                 :             : 
    5030                 :         305 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    5031                 :         305 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    5032                 :             :                                                       new_lhs3, rshift);
    5033                 :             : 
    5034                 :         305 :                   return pattern_stmt;
    5035                 :             :                 }
    5036                 :         470 :             }
    5037                 :         472 :         }
    5038                 :             :     }
    5039                 :             : 
    5040                 :       57715 :   if (prec > HOST_BITS_PER_WIDE_INT
    5041                 :       57715 :       || integer_zerop (oprnd1))
    5042                 :         500 :     return NULL;
    5043                 :             : 
    5044                 :       57215 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    5045                 :             :     return NULL;
    5046                 :             : 
    5047                 :       15257 :   if (TYPE_UNSIGNED (itype))
    5048                 :             :     {
    5049                 :       10190 :       unsigned HOST_WIDE_INT mh, ml;
    5050                 :       10190 :       int pre_shift, post_shift;
    5051                 :       10190 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    5052                 :       10190 :                                   & GET_MODE_MASK (itype_mode));
    5053                 :       10190 :       tree t1, t2, t3, t4;
    5054                 :             : 
    5055                 :       10190 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    5056                 :             :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    5057                 :          22 :         return NULL;
    5058                 :             : 
    5059                 :             :       /* Find a suitable multiplier and right shift count instead of
    5060                 :             :          directly dividing by D.  */
    5061                 :       10168 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
    5062                 :             : 
    5063                 :             :       /* If the suggested multiplier is more than PREC bits, we can do better
    5064                 :             :          for even divisors, using an initial right shift.  */
    5065                 :       10168 :       if (mh != 0 && (d & 1) == 0)
    5066                 :             :         {
    5067                 :         374 :           pre_shift = ctz_or_zero (d);
    5068                 :         374 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    5069                 :             :                                   &ml, &post_shift);
    5070                 :         374 :           gcc_assert (!mh);
    5071                 :             :         }
    5072                 :             :       else
    5073                 :             :         pre_shift = 0;
    5074                 :             : 
    5075                 :         686 :       if (mh != 0)
    5076                 :             :         {
    5077                 :         686 :           if (post_shift - 1 >= prec)
    5078                 :             :             return NULL;
    5079                 :             : 
    5080                 :             :           /* t1 = oprnd0 h* ml;
    5081                 :             :              t2 = oprnd0 - t1;
    5082                 :             :              t3 = t2 >> 1;
    5083                 :             :              t4 = t1 + t3;
    5084                 :             :              q = t4 >> (post_shift - 1);  */
    5085                 :         686 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    5086                 :         686 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5087                 :         686 :                                           build_int_cst (itype, ml));
    5088                 :         686 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5089                 :             : 
    5090                 :         686 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5091                 :         686 :           def_stmt
    5092                 :         686 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    5093                 :         686 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5094                 :             : 
    5095                 :         686 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5096                 :         686 :           def_stmt
    5097                 :         686 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    5098                 :         686 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5099                 :             : 
    5100                 :         686 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5101                 :         686 :           def_stmt
    5102                 :         686 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    5103                 :             : 
    5104                 :         686 :           if (post_shift != 1)
    5105                 :             :             {
    5106                 :         686 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5107                 :             : 
    5108                 :         686 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5109                 :         686 :               pattern_stmt
    5110                 :         686 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    5111                 :         686 :                                        build_int_cst (itype, post_shift - 1));
    5112                 :             :             }
    5113                 :             :           else
    5114                 :             :             {
    5115                 :             :               q = t4;
    5116                 :             :               pattern_stmt = def_stmt;
    5117                 :             :             }
    5118                 :             :         }
    5119                 :             :       else
    5120                 :             :         {
    5121                 :        9482 :           if (pre_shift >= prec || post_shift >= prec)
    5122                 :             :             return NULL;
    5123                 :             : 
    5124                 :             :           /* t1 = oprnd0 >> pre_shift;
    5125                 :             :              t2 = t1 h* ml;
    5126                 :             :              q = t2 >> post_shift;  */
    5127                 :        9482 :           if (pre_shift)
    5128                 :             :             {
    5129                 :         374 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    5130                 :         374 :               def_stmt
    5131                 :         374 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    5132                 :         374 :                                        build_int_cst (NULL, pre_shift));
    5133                 :         374 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5134                 :             :             }
    5135                 :             :           else
    5136                 :             :             t1 = oprnd0;
    5137                 :             : 
    5138                 :        9482 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5139                 :        9482 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    5140                 :        9482 :                                           build_int_cst (itype, ml));
    5141                 :             : 
    5142                 :        9482 :           if (post_shift)
    5143                 :             :             {
    5144                 :        9476 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5145                 :             : 
    5146                 :        9476 :               q = vect_recog_temp_ssa_var (itype, NULL);
    5147                 :        9476 :               def_stmt
    5148                 :        9476 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    5149                 :        9476 :                                        build_int_cst (itype, post_shift));
    5150                 :             :             }
    5151                 :             :           else
    5152                 :             :             q = t2;
    5153                 :             : 
    5154                 :             :           pattern_stmt = def_stmt;
    5155                 :             :         }
    5156                 :             :     }
    5157                 :             :   else
    5158                 :             :     {
    5159                 :        5067 :       unsigned HOST_WIDE_INT ml;
    5160                 :        5067 :       int post_shift;
    5161                 :        5067 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    5162                 :        5067 :       unsigned HOST_WIDE_INT abs_d;
    5163                 :        5067 :       bool add = false;
    5164                 :        5067 :       tree t1, t2, t3, t4;
    5165                 :             : 
    5166                 :             :       /* Give up for -1.  */
    5167                 :        5067 :       if (d == -1)
    5168                 :           0 :         return NULL;
    5169                 :             : 
    5170                 :             :       /* Since d might be INT_MIN, we have to cast to
    5171                 :             :          unsigned HOST_WIDE_INT before negating to avoid
    5172                 :             :          undefined signed overflow.  */
    5173                 :        5067 :       abs_d = (d >= 0
    5174                 :        5067 :                ? (unsigned HOST_WIDE_INT) d
    5175                 :             :                : - (unsigned HOST_WIDE_INT) d);
    5176                 :             : 
    5177                 :             :       /* n rem d = n rem -d */
    5178                 :        5067 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    5179                 :             :         {
    5180                 :           0 :           d = abs_d;
    5181                 :           0 :           oprnd1 = build_int_cst (itype, abs_d);
    5182                 :             :         }
    5183                 :        5067 :       if (HOST_BITS_PER_WIDE_INT >= prec
    5184                 :        5067 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    5185                 :             :         /* This case is not handled correctly below.  */
    5186                 :             :         return NULL;
    5187                 :             : 
    5188                 :        5067 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
    5189                 :        5067 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    5190                 :             :         {
    5191                 :        1448 :           add = true;
    5192                 :        1448 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    5193                 :             :         }
    5194                 :        5067 :       if (post_shift >= prec)
    5195                 :             :         return NULL;
    5196                 :             : 
    5197                 :             :       /* t1 = oprnd0 h* ml;  */
    5198                 :        5067 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5199                 :        5067 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    5200                 :        5067 :                                       build_int_cst (itype, ml));
    5201                 :             : 
    5202                 :        5067 :       if (add)
    5203                 :             :         {
    5204                 :             :           /* t2 = t1 + oprnd0;  */
    5205                 :        1448 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5206                 :        1448 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    5207                 :        1448 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    5208                 :             :         }
    5209                 :             :       else
    5210                 :             :         t2 = t1;
    5211                 :             : 
    5212                 :        5067 :       if (post_shift)
    5213                 :             :         {
    5214                 :             :           /* t3 = t2 >> post_shift;  */
    5215                 :        4437 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5216                 :        4437 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    5217                 :        4437 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    5218                 :        4437 :                                           build_int_cst (itype, post_shift));
    5219                 :             :         }
    5220                 :             :       else
    5221                 :             :         t3 = t2;
    5222                 :             : 
    5223                 :        5067 :       int msb = 1;
    5224                 :        5067 :       int_range_max r;
    5225                 :       10134 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    5226                 :        5067 :       if (!r.varying_p () && !r.undefined_p ())
    5227                 :             :         {
    5228                 :        2994 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    5229                 :             :             msb = 0;
    5230                 :         688 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    5231                 :             :             msb = -1;
    5232                 :             :         }
    5233                 :             : 
    5234                 :        2306 :       if (msb == 0 && d >= 0)
    5235                 :             :         {
    5236                 :             :           /* q = t3;  */
    5237                 :             :           q = t3;
    5238                 :             :           pattern_stmt = def_stmt;
    5239                 :             :         }
    5240                 :             :       else
    5241                 :             :         {
    5242                 :             :           /* t4 = oprnd0 >> (prec - 1);
    5243                 :             :              or if we know from VRP that oprnd0 >= 0
    5244                 :             :              t4 = 0;
    5245                 :             :              or if we know from VRP that oprnd0 < 0
    5246                 :             :              t4 = -1;  */
    5247                 :        2827 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5248                 :        2827 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    5249                 :        2827 :           if (msb != 1)
    5250                 :          74 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5251                 :          74 :                                             build_int_cst (itype, msb));
    5252                 :             :           else
    5253                 :        2753 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5254                 :        2753 :                                             build_int_cst (itype, prec - 1));
    5255                 :        2827 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5256                 :             : 
    5257                 :             :           /* q = t3 - t4;  or q = t4 - t3;  */
    5258                 :        2827 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5259                 :        5493 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5260                 :             :                                               d < 0 ? t3 : t4);
    5261                 :             :         }
    5262                 :        5067 :     }
    5263                 :             : 
    5264                 :       15235 :   if (rhs_code == TRUNC_MOD_EXPR)
    5265                 :             :     {
    5266                 :        6121 :       tree r, t1;
    5267                 :             : 
    5268                 :             :       /* We divided.  Now finish by:
    5269                 :             :          t1 = q * oprnd1;
    5270                 :             :          r = oprnd0 - t1;  */
    5271                 :        6121 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5272                 :             : 
    5273                 :        6121 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5274                 :        6121 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5275                 :        6121 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5276                 :             : 
    5277                 :        6121 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5278                 :        6121 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5279                 :             :     }
    5280                 :             : 
    5281                 :             :   /* Pattern detected.  */
    5282                 :       15235 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5283                 :             : 
    5284                 :       15235 :   *type_out = vectype;
    5285                 :       15235 :   return pattern_stmt;
    5286                 :             : }
    5287                 :             : 
    5288                 :             : /* Detects pattern with a modulo operation (S1) where both arguments
    5289                 :             :    are variables of integral type.
    5290                 :             :    The statement is replaced by division, multiplication, and subtraction.
    5291                 :             :    The last statement (S4) is returned.
    5292                 :             : 
    5293                 :             :    Example:
    5294                 :             :    S1 c_t = a_t % b_t;
    5295                 :             : 
    5296                 :             :    is replaced by
    5297                 :             :    S2 x_t = a_t / b_t;
    5298                 :             :    S3 y_t = x_t * b_t;
    5299                 :             :    S4 z_t = a_t - y_t;  */
    5300                 :             : 
    5301                 :             : static gimple *
    5302                 :    27578497 : vect_recog_mod_var_pattern (vec_info *vinfo,
    5303                 :             :                             stmt_vec_info stmt_vinfo, tree *type_out)
    5304                 :             : {
    5305                 :    27578497 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5306                 :    27578497 :   tree oprnd0, oprnd1, vectype, itype;
    5307                 :    27578497 :   gimple *pattern_stmt, *def_stmt;
    5308                 :    27578497 :   enum tree_code rhs_code;
    5309                 :             : 
    5310                 :    27578497 :   if (!is_gimple_assign (last_stmt))
    5311                 :             :     return NULL;
    5312                 :             : 
    5313                 :    18873448 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5314                 :    18873448 :   if (rhs_code != TRUNC_MOD_EXPR)
    5315                 :             :     return NULL;
    5316                 :             : 
    5317                 :       68085 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    5318                 :       68085 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    5319                 :       68085 :   itype = TREE_TYPE (oprnd0);
    5320                 :       68085 :   if (TREE_CODE (oprnd0) != SSA_NAME
    5321                 :       60284 :       || TREE_CODE (oprnd1) != SSA_NAME
    5322                 :       42628 :       || TREE_CODE (itype) != INTEGER_TYPE)
    5323                 :             :     return NULL;
    5324                 :             : 
    5325                 :       42524 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    5326                 :             : 
    5327                 :       42524 :   if (!vectype
    5328                 :       33564 :       || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
    5329                 :       33564 :       || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
    5330                 :           0 :       || !target_has_vecop_for_code (MULT_EXPR, vectype)
    5331                 :       42524 :       || !target_has_vecop_for_code (MINUS_EXPR, vectype))
    5332                 :       42524 :     return NULL;
    5333                 :             : 
    5334                 :           0 :   tree q, tmp, r;
    5335                 :           0 :   q = vect_recog_temp_ssa_var (itype, NULL);
    5336                 :           0 :   def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
    5337                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5338                 :             : 
    5339                 :           0 :   tmp = vect_recog_temp_ssa_var (itype, NULL);
    5340                 :           0 :   def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
    5341                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
    5342                 :             : 
    5343                 :           0 :   r = vect_recog_temp_ssa_var (itype, NULL);
    5344                 :           0 :   pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
    5345                 :             : 
    5346                 :             :   /* Pattern detected.  */
    5347                 :           0 :   *type_out = vectype;
    5348                 :           0 :   vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
    5349                 :             : 
    5350                 :           0 :   return pattern_stmt;
    5351                 :             : }
    5352                 :             : 
    5353                 :             : 
    5354                 :             : /* Return the proper type for converting bool VAR into
    5355                 :             :    an integer value or NULL_TREE if no such type exists.
    5356                 :             :    The type is chosen so that the converted value has the
    5357                 :             :    same number of elements as VAR's vector type.  */
    5358                 :             : 
    5359                 :             : static tree
    5360                 :     3224109 : integer_type_for_mask (tree var, vec_info *vinfo)
    5361                 :             : {
    5362                 :     3224109 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5363                 :             :     return NULL_TREE;
    5364                 :             : 
    5365                 :     1136719 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    5366                 :     1136719 :   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
    5367                 :             :     return NULL_TREE;
    5368                 :             : 
    5369                 :      606722 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5370                 :             : }
    5371                 :             : 
    5372                 :             : /* Function vect_recog_gcond_pattern
    5373                 :             : 
    5374                 :             :    Try to find pattern like following:
    5375                 :             : 
    5376                 :             :      if (a op b)
    5377                 :             : 
    5378                 :             :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5379                 :             : 
    5380                 :             :      mask = a op b
    5381                 :             :      if (mask != 0)
    5382                 :             : 
    5383                 :             :    and set the mask type on MASK.
    5384                 :             : 
    5385                 :             :    Input:
    5386                 :             : 
    5387                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5388                 :             :                  search begins, i.e. cast of a bool to
    5389                 :             :                  an integer type.
    5390                 :             : 
    5391                 :             :    Output:
    5392                 :             : 
    5393                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5394                 :             : 
    5395                 :             :    * Return value: A new stmt that will be used to replace the pattern.  */
    5396                 :             : 
    5397                 :             : static gimple *
    5398                 :    27656303 : vect_recog_gcond_pattern (vec_info *vinfo,
    5399                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5400                 :             : {
    5401                 :             :   /* Currently we only support this for loop vectorization and when multiple
    5402                 :             :      exits.  */
    5403                 :    27656303 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5404                 :     3160762 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5405                 :             :     return NULL;
    5406                 :             : 
    5407                 :     1005576 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5408                 :     1005576 :   gcond* cond = NULL;
    5409                 :    27671641 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5410                 :             :     return NULL;
    5411                 :             : 
    5412                 :      255713 :   auto lhs = gimple_cond_lhs (cond);
    5413                 :      255713 :   auto rhs = gimple_cond_rhs (cond);
    5414                 :      255713 :   auto code = gimple_cond_code (cond);
    5415                 :             : 
    5416                 :      255713 :   tree scalar_type = TREE_TYPE (lhs);
    5417                 :      255713 :   if (VECTOR_TYPE_P (scalar_type))
    5418                 :             :     return NULL;
    5419                 :             : 
    5420                 :             :   /* If the input is a boolean then try to figure out the precision that the
    5421                 :             :      vector type should use.  We cannot use the scalar precision as this would
    5422                 :             :      later mismatch.  This is similar to what recog_bool does.  */
    5423                 :      255713 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5424                 :             :     {
    5425                 :       10483 :       if (tree stype = integer_type_for_mask (lhs, vinfo))
    5426                 :      255713 :         scalar_type = stype;
    5427                 :             :     }
    5428                 :             : 
    5429                 :      255713 :   tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
    5430                 :      255713 :   if (vectype == NULL_TREE)
    5431                 :             :     return NULL;
    5432                 :             : 
    5433                 :      240375 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5434                 :      240375 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5435                 :      240375 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5436                 :             : 
    5437                 :      240375 :   gimple *pattern_stmt
    5438                 :      240375 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5439                 :      240375 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5440                 :             :                          NULL_TREE, NULL_TREE);
    5441                 :      240375 :   *type_out = vectype;
    5442                 :      240375 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5443                 :      240375 :   return pattern_stmt;
    5444                 :             : }
    5445                 :             : 
    5446                 :             : /* Function vect_recog_bool_pattern
    5447                 :             : 
    5448                 :             :    Try to find pattern like following:
    5449                 :             : 
    5450                 :             :      bool a_b, b_b, c_b, d_b, e_b;
    5451                 :             :      TYPE f_T;
    5452                 :             :    loop:
    5453                 :             :      S1  a_b = x1 CMP1 y1;
    5454                 :             :      S2  b_b = x2 CMP2 y2;
    5455                 :             :      S3  c_b = a_b & b_b;
    5456                 :             :      S4  d_b = x3 CMP3 y3;
    5457                 :             :      S5  e_b = c_b | d_b;
    5458                 :             :      S6  f_T = (TYPE) e_b;
    5459                 :             : 
    5460                 :             :    where type 'TYPE' is an integral type.  Or a similar pattern
    5461                 :             :    ending in
    5462                 :             : 
    5463                 :             :      S6  f_Y = e_b ? r_Y : s_Y;
    5464                 :             : 
    5465                 :             :    as results from if-conversion of a complex condition.
    5466                 :             : 
    5467                 :             :    Input:
    5468                 :             : 
    5469                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5470                 :             :                  search begins, i.e. cast of a bool to
    5471                 :             :                  an integer type.
    5472                 :             : 
    5473                 :             :    Output:
    5474                 :             : 
    5475                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5476                 :             : 
    5477                 :             :    * Return value: A new stmt that will be used to replace the pattern.
    5478                 :             : 
    5479                 :             :         Assuming size of TYPE is the same as size of all comparisons
    5480                 :             :         (otherwise some casts would be added where needed), the above
    5481                 :             :         sequence we create related pattern stmts:
    5482                 :             :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5483                 :             :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5484                 :             :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5485                 :             :         S5'  e_T = c_T | d_T;
    5486                 :             :         S6'  f_T = e_T;
    5487                 :             : 
    5488                 :             :         Instead of the above S3' we could emit:
    5489                 :             :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5490                 :             :         S3'  c_T = a_T | b_T;
    5491                 :             :         but the above is more efficient.  */
    5492                 :             : 
    5493                 :             : static gimple *
    5494                 :    27656303 : vect_recog_bool_pattern (vec_info *vinfo,
    5495                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5496                 :             : {
    5497                 :    27656303 :   gimple *last_stmt = stmt_vinfo->stmt;
    5498                 :    27656303 :   enum tree_code rhs_code;
    5499                 :    27656303 :   tree var, lhs, rhs, vectype;
    5500                 :    27656303 :   gimple *pattern_stmt;
    5501                 :             : 
    5502                 :    27656303 :   if (!is_gimple_assign (last_stmt))
    5503                 :             :     return NULL;
    5504                 :             : 
    5505                 :    19191629 :   var = gimple_assign_rhs1 (last_stmt);
    5506                 :    19191629 :   lhs = gimple_assign_lhs (last_stmt);
    5507                 :    19191629 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5508                 :             : 
    5509                 :    19191629 :   if (rhs_code == VIEW_CONVERT_EXPR)
    5510                 :      167460 :     var = TREE_OPERAND (var, 0);
    5511                 :             : 
    5512                 :    19191629 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5513                 :             :     return NULL;
    5514                 :             : 
    5515                 :      666513 :   hash_set<gimple *> bool_stmts;
    5516                 :             : 
    5517                 :      666513 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    5518                 :      577839 :       || rhs_code == VIEW_CONVERT_EXPR)
    5519                 :             :     {
    5520                 :      184152 :       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    5521                 :      184021 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    5522                 :             :         return NULL;
    5523                 :       88594 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5524                 :             : 
    5525                 :       88594 :       tree type = integer_type_for_mask (var, vinfo);
    5526                 :       88594 :       tree cst0, cst1, tmp;
    5527                 :             : 
    5528                 :       88594 :       if (!type)
    5529                 :             :         return NULL;
    5530                 :             : 
    5531                 :             :       /* We may directly use cond with narrowed type to avoid multiple cond
    5532                 :             :          exprs with following result packing and perform single cond with
    5533                 :             :          packed mask instead.  In case of widening we better make cond first
    5534                 :             :          and then extract results.  */
    5535                 :       40661 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    5536                 :       28279 :         type = TREE_TYPE (lhs);
    5537                 :             : 
    5538                 :       40661 :       cst0 = build_int_cst (type, 0);
    5539                 :       40661 :       cst1 = build_int_cst (type, 1);
    5540                 :       40661 :       tmp = vect_recog_temp_ssa_var (type, NULL);
    5541                 :       40661 :       pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    5542                 :             : 
    5543                 :       40661 :       if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    5544                 :             :         {
    5545                 :       12382 :           tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5546                 :       12382 :           append_pattern_def_seq (vinfo, stmt_vinfo,
    5547                 :             :                                   pattern_stmt, new_vectype);
    5548                 :             : 
    5549                 :       12382 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5550                 :       12382 :           pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
    5551                 :             :         }
    5552                 :             : 
    5553                 :       40661 :       *type_out = vectype;
    5554                 :       40661 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5555                 :             : 
    5556                 :       40661 :       return pattern_stmt;
    5557                 :             :     }
    5558                 :      574050 :   else if (rhs_code == COND_EXPR
    5559                 :      144861 :            && TREE_CODE (var) == SSA_NAME)
    5560                 :             :     {
    5561                 :      144861 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5562                 :      144861 :       if (vectype == NULL_TREE)
    5563                 :             :         return NULL;
    5564                 :             : 
    5565                 :             :       /* Build a scalar type for the boolean result that when
    5566                 :             :          vectorized matches the vector type of the result in
    5567                 :             :          size and number of elements.  */
    5568                 :      132954 :       unsigned prec
    5569                 :      132954 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    5570                 :             :                                TYPE_VECTOR_SUBPARTS (vectype));
    5571                 :             : 
    5572                 :      132954 :       tree type
    5573                 :      265908 :         = build_nonstandard_integer_type (prec,
    5574                 :      132954 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    5575                 :      132954 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    5576                 :             :         return NULL;
    5577                 :             : 
    5578                 :      132954 :       enum vect_def_type dt;
    5579                 :      132954 :       if (integer_type_for_mask (var, vinfo))
    5580                 :             :         return NULL;
    5581                 :       26189 :       else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
    5582                 :       26189 :                && vect_is_simple_use (var, vinfo, &dt)
    5583                 :       26189 :                && (dt == vect_external_def
    5584                 :       26182 :                    || dt == vect_constant_def))
    5585                 :             :         {
    5586                 :             :           /* If the condition is already a boolean then manually convert it to a
    5587                 :             :              mask of the given integer type but don't set a vectype.  */
    5588                 :        1338 :           tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
    5589                 :        1338 :           pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
    5590                 :             :                                               build_all_ones_cst (type),
    5591                 :             :                                               build_zero_cst (type));
    5592                 :        1338 :           append_inv_pattern_def_seq (vinfo, pattern_stmt);
    5593                 :        1338 :           var = lhs_ivar;
    5594                 :             :         }
    5595                 :             : 
    5596                 :       26189 :       tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5597                 :       26189 :       pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
    5598                 :       26189 :                                           build_zero_cst (TREE_TYPE (var)));
    5599                 :             : 
    5600                 :       26189 :       tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
    5601                 :       26189 :       if (!new_vectype)
    5602                 :             :         return NULL;
    5603                 :             : 
    5604                 :       26189 :       new_vectype = truth_type_for (new_vectype);
    5605                 :       26189 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
    5606                 :       26189 :                               TREE_TYPE (var));
    5607                 :             : 
    5608                 :       26189 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5609                 :       26189 :       pattern_stmt
    5610                 :       26189 :         = gimple_build_assign (lhs, COND_EXPR, lhs_var,
    5611                 :             :                                gimple_assign_rhs2 (last_stmt),
    5612                 :             :                                gimple_assign_rhs3 (last_stmt));
    5613                 :       26189 :       *type_out = vectype;
    5614                 :       26189 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5615                 :             : 
    5616                 :       26189 :       return pattern_stmt;
    5617                 :             :     }
    5618                 :      429189 :   else if (rhs_code == SSA_NAME
    5619                 :       22541 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    5620                 :             :     {
    5621                 :        7768 :       stmt_vec_info pattern_stmt_info;
    5622                 :        7768 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5623                 :        7768 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    5624                 :           0 :         return NULL;
    5625                 :             : 
    5626                 :        7768 :       tree type = integer_type_for_mask (var, vinfo);
    5627                 :        7768 :       tree cst0, cst1, new_vectype;
    5628                 :             : 
    5629                 :        7768 :       if (!type)
    5630                 :             :         return NULL;
    5631                 :             : 
    5632                 :        4451 :       if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
    5633                 :         553 :         type = TREE_TYPE (vectype);
    5634                 :             : 
    5635                 :        4451 :       cst0 = build_int_cst (type, 0);
    5636                 :        4451 :       cst1 = build_int_cst (type, 1);
    5637                 :        4451 :       new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5638                 :             : 
    5639                 :        4451 :       rhs = vect_recog_temp_ssa_var (type, NULL);
    5640                 :        4451 :       pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    5641                 :        4451 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
    5642                 :             : 
    5643                 :        4451 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    5644                 :        4451 :       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    5645                 :             :         {
    5646                 :        3898 :           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5647                 :        3898 :           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
    5648                 :        3898 :           append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
    5649                 :        3898 :           rhs = rhs2;
    5650                 :             :         }
    5651                 :        4451 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    5652                 :        4451 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5653                 :        4451 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5654                 :        4451 :       *type_out = vectype;
    5655                 :        4451 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5656                 :             : 
    5657                 :        4451 :       return pattern_stmt;
    5658                 :             :     }
    5659                 :             :   else
    5660                 :             :     return NULL;
    5661                 :      666513 : }
    5662                 :             : 
    5663                 :             : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5664                 :             :    conversion of MASK to a type suitable for masking VECTYPE.
    5665                 :             :    Built statement gets required vectype and is appended to
    5666                 :             :    a pattern sequence of STMT_VINFO.
    5667                 :             : 
    5668                 :             :    Return converted mask.  */
    5669                 :             : 
    5670                 :             : static tree
    5671                 :       46781 : build_mask_conversion (vec_info *vinfo,
    5672                 :             :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5673                 :             : {
    5674                 :       46781 :   gimple *stmt;
    5675                 :       46781 :   tree masktype, tmp;
    5676                 :             : 
    5677                 :       46781 :   masktype = truth_type_for (vectype);
    5678                 :       46781 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5679                 :       46781 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5680                 :       46781 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5681                 :       46781 :                           stmt, masktype, TREE_TYPE (vectype));
    5682                 :             : 
    5683                 :       46781 :   return tmp;
    5684                 :             : }
    5685                 :             : 
    5686                 :             : 
    5687                 :             : /* Function vect_recog_mask_conversion_pattern
    5688                 :             : 
    5689                 :             :    Try to find statements which require boolean type
    5690                 :             :    converison.  Additional conversion statements are
    5691                 :             :    added to handle such cases.  For example:
    5692                 :             : 
    5693                 :             :    bool m_1, m_2, m_3;
    5694                 :             :    int i_4, i_5;
    5695                 :             :    double d_6, d_7;
    5696                 :             :    char c_1, c_2, c_3;
    5697                 :             : 
    5698                 :             :    S1   m_1 = i_4 > i_5;
    5699                 :             :    S2   m_2 = d_6 < d_7;
    5700                 :             :    S3   m_3 = m_1 & m_2;
    5701                 :             :    S4   c_1 = m_3 ? c_2 : c_3;
    5702                 :             : 
    5703                 :             :    Will be transformed into:
    5704                 :             : 
    5705                 :             :    S1   m_1 = i_4 > i_5;
    5706                 :             :    S2   m_2 = d_6 < d_7;
    5707                 :             :    S3'' m_2' = (_Bool[bitsize=32])m_2
    5708                 :             :    S3'  m_3' = m_1 & m_2';
    5709                 :             :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    5710                 :             :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    5711                 :             : 
    5712                 :             : static gimple *
    5713                 :    27632001 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    5714                 :             :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    5715                 :             : {
    5716                 :    27632001 :   gimple *last_stmt = stmt_vinfo->stmt;
    5717                 :    27632001 :   enum tree_code rhs_code;
    5718                 :    27632001 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    5719                 :    27632001 :   tree vectype1, vectype2;
    5720                 :    27632001 :   stmt_vec_info pattern_stmt_info;
    5721                 :             : 
    5722                 :             :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    5723                 :             :      conversion.  */
    5724                 :    27632001 :   if (is_gimple_call (last_stmt)
    5725                 :    27632001 :       && gimple_call_internal_p (last_stmt))
    5726                 :             :     {
    5727                 :       87936 :       gcall *pattern_stmt;
    5728                 :             : 
    5729                 :       87936 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    5730                 :       87936 :       int mask_argno = internal_fn_mask_index (ifn);
    5731                 :       87936 :       if (mask_argno < 0)
    5732                 :             :         return NULL;
    5733                 :             : 
    5734                 :        8759 :       bool store_p = internal_store_fn_p (ifn);
    5735                 :        8759 :       bool load_p = internal_store_fn_p (ifn);
    5736                 :        8759 :       if (store_p)
    5737                 :             :         {
    5738                 :        1712 :           int rhs_index = internal_fn_stored_value_index (ifn);
    5739                 :        1712 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    5740                 :        1712 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    5741                 :             :         }
    5742                 :             :       else
    5743                 :             :         {
    5744                 :        7047 :           lhs = gimple_call_lhs (last_stmt);
    5745                 :        7047 :           if (!lhs)
    5746                 :             :             return NULL;
    5747                 :        7047 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5748                 :             :         }
    5749                 :             : 
    5750                 :        8759 :       if (!vectype1)
    5751                 :             :         return NULL;
    5752                 :             : 
    5753                 :        8630 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    5754                 :        8630 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    5755                 :        8630 :       if (mask_arg_type)
    5756                 :             :         {
    5757                 :        7592 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    5758                 :             : 
    5759                 :        7592 :           if (!vectype2
    5760                 :        7592 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    5761                 :             :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    5762                 :        4454 :             return NULL;
    5763                 :             :         }
    5764                 :        1038 :       else if (store_p || load_p)
    5765                 :             :         return NULL;
    5766                 :             : 
    5767                 :        3977 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    5768                 :             : 
    5769                 :        3977 :       auto_vec<tree, 8> args;
    5770                 :        3977 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    5771                 :        3977 :       args.safe_grow (nargs, true);
    5772                 :       19885 :       for (unsigned int i = 0; i < nargs; ++i)
    5773                 :       15908 :         args[i] = ((int) i == mask_argno
    5774                 :       15908 :                    ? tmp
    5775                 :       11931 :                    : gimple_call_arg (last_stmt, i));
    5776                 :        3977 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    5777                 :             : 
    5778                 :        3977 :       if (!store_p)
    5779                 :             :         {
    5780                 :        3596 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5781                 :        3596 :           gimple_call_set_lhs (pattern_stmt, lhs);
    5782                 :             :         }
    5783                 :             : 
    5784                 :        3596 :       if (load_p || store_p)
    5785                 :         381 :         gimple_call_set_nothrow (pattern_stmt, true);
    5786                 :             : 
    5787                 :        3977 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5788                 :        3977 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    5789                 :        1729 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5790                 :             : 
    5791                 :        3977 :       *type_out = vectype1;
    5792                 :        3977 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5793                 :             : 
    5794                 :        3977 :       return pattern_stmt;
    5795                 :        3977 :     }
    5796                 :             : 
    5797                 :    27544065 :   if (!is_gimple_assign (last_stmt))
    5798                 :             :     return NULL;
    5799                 :             : 
    5800                 :    19167327 :   gimple *pattern_stmt;
    5801                 :    19167327 :   lhs = gimple_assign_lhs (last_stmt);
    5802                 :    19167327 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    5803                 :    19167327 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5804                 :             : 
    5805                 :             :   /* Check for cond expression requiring mask conversion.  */
    5806                 :    19167327 :   if (rhs_code == COND_EXPR)
    5807                 :             :     {
    5808                 :      135604 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5809                 :             : 
    5810                 :      135604 :       gcc_assert (! COMPARISON_CLASS_P (rhs1));
    5811                 :      135604 :       if (TREE_CODE (rhs1) == SSA_NAME)
    5812                 :             :         {
    5813                 :      135604 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    5814                 :      135604 :           if (!rhs1_type)
    5815                 :             :             return NULL;
    5816                 :             :         }
    5817                 :             :       else
    5818                 :             :         return NULL;
    5819                 :             : 
    5820                 :      125272 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    5821                 :             : 
    5822                 :      125272 :       if (!vectype1 || !vectype2)
    5823                 :             :         return NULL;
    5824                 :             : 
    5825                 :             :       /* Continue if a conversion is needed.  Also continue if we have
    5826                 :             :          a comparison whose vector type would normally be different from
    5827                 :             :          VECTYPE2 when considered in isolation.  In that case we'll
    5828                 :             :          replace the comparison with an SSA name (so that we can record
    5829                 :             :          its vector type) and behave as though the comparison was an SSA
    5830                 :             :          name from the outset.  */
    5831                 :      123677 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    5832                 :             :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    5833                 :             :         return NULL;
    5834                 :             : 
    5835                 :       27275 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    5836                 :       54550 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    5837                 :       27275 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    5838                 :             :       else
    5839                 :             :         tmp = rhs1;
    5840                 :             : 
    5841                 :       27275 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5842                 :       27275 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    5843                 :             :                                           gimple_assign_rhs2 (last_stmt),
    5844                 :             :                                           gimple_assign_rhs3 (last_stmt));
    5845                 :             : 
    5846                 :       27275 :       *type_out = vectype1;
    5847                 :       27275 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5848                 :             : 
    5849                 :       27275 :       return pattern_stmt;
    5850                 :             :     }
    5851                 :             : 
    5852                 :             :   /* Now check for binary boolean operations requiring conversion for
    5853                 :             :      one of operands.  */
    5854                 :    19031723 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    5855                 :             :     return NULL;
    5856                 :             : 
    5857                 :     1532061 :   if (rhs_code != BIT_IOR_EXPR
    5858                 :             :       && rhs_code != BIT_XOR_EXPR
    5859                 :     1532061 :       && rhs_code != BIT_AND_EXPR
    5860                 :     1193681 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    5861                 :             :     return NULL;
    5862                 :             : 
    5863                 :     1420038 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    5864                 :             : 
    5865                 :     1420038 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    5866                 :     1420038 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    5867                 :             : 
    5868                 :     1420038 :   if (!rhs1_type || !rhs2_type
    5869                 :     1420038 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    5870                 :             :     return NULL;
    5871                 :             : 
    5872                 :       15529 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    5873                 :             :     {
    5874                 :       10028 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    5875                 :       10028 :       if (!vectype1)
    5876                 :             :         return NULL;
    5877                 :       10028 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    5878                 :             :     }
    5879                 :             :   else
    5880                 :             :     {
    5881                 :        5501 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    5882                 :        5501 :       if (!vectype1)
    5883                 :             :         return NULL;
    5884                 :        5501 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    5885                 :             :     }
    5886                 :             : 
    5887                 :       15529 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5888                 :       15529 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    5889                 :             : 
    5890                 :       15529 :   *type_out = vectype1;
    5891                 :       15529 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5892                 :             : 
    5893                 :       15529 :   return pattern_stmt;
    5894                 :             : }
    5895                 :             : 
    5896                 :             : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    5897                 :             :    the boolean condition under which it occurs, otherwise return null.  */
    5898                 :             : 
    5899                 :             : static tree
    5900                 :       31800 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    5901                 :             : {
    5902                 :       31800 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    5903                 :             :     {
    5904                 :       30369 :       gcc_assert (gimple_assign_single_p (def_assign));
    5905                 :             :       return NULL_TREE;
    5906                 :             :     }
    5907                 :             : 
    5908                 :        1431 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    5909                 :             :     {
    5910                 :        1431 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    5911                 :        1431 :       int mask_index = internal_fn_mask_index (ifn);
    5912                 :        1431 :       return gimple_call_arg (def_call, mask_index);
    5913                 :             :     }
    5914                 :             : 
    5915                 :           0 :   gcc_unreachable ();
    5916                 :             : }
    5917                 :             : 
    5918                 :             : /* Return MASK if MASK is suitable for masking an operation on vectors
    5919                 :             :    of type VECTYPE, otherwise convert it into such a form and return
    5920                 :             :    the result.  Associate any conversion statements with STMT_INFO's
    5921                 :             :    pattern.  */
    5922                 :             : 
    5923                 :             : static tree
    5924                 :           0 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    5925                 :             :                                stmt_vec_info stmt_info, vec_info *vinfo)
    5926                 :             : {
    5927                 :           0 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    5928                 :           0 :   if (mask_type)
    5929                 :             :     {
    5930                 :           0 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    5931                 :           0 :       if (mask_vectype
    5932                 :           0 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    5933                 :           0 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    5934                 :           0 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    5935                 :             :     }
    5936                 :           0 :   return mask;
    5937                 :             : }
    5938                 :             : 
    5939                 :             : /* Return the equivalent of:
    5940                 :             : 
    5941                 :             :      fold_convert (TYPE, VALUE)
    5942                 :             : 
    5943                 :             :    with the expectation that the operation will be vectorized.
    5944                 :             :    If new statements are needed, add them as pattern statements
    5945                 :             :    to STMT_INFO.  */
    5946                 :             : 
    5947                 :             : static tree
    5948                 :           0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    5949                 :             :                                 tree type, tree value, stmt_vec_info stmt_info)
    5950                 :             : {
    5951                 :           0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    5952                 :             :     return value;
    5953                 :             : 
    5954                 :           0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    5955                 :           0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    5956                 :           0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    5957                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    5958                 :           0 :   return new_value;
    5959                 :             : }
    5960                 :             : 
    5961                 :             : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    5962                 :             :    internal function.  Return the final statement on success and set
    5963                 :             :    *TYPE_OUT to the vector type being loaded or stored.
    5964                 :             : 
    5965                 :             :    This function only handles gathers and scatters that were recognized
    5966                 :             :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    5967                 :             : 
    5968                 :             : static gimple *
    5969                 :    27632001 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    5970                 :             :                                    stmt_vec_info stmt_info, tree *type_out)
    5971                 :             : {
    5972                 :             :   /* Currently we only support this for loop vectorization.  */
    5973                 :    30792234 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5974                 :     3160233 :   if (!loop_vinfo)
    5975                 :             :     return NULL;
    5976                 :             : 
    5977                 :             :   /* Make sure that we're looking at a gather load or scatter store.  */
    5978                 :     3160233 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    5979                 :     3160233 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    5980                 :             :     return NULL;
    5981                 :             : 
    5982                 :             :   /* Get the boolean that controls whether the load or store happens.
    5983                 :             :      This is null if the operation is unconditional.  */
    5984                 :       31800 :   tree mask = vect_get_load_store_mask (stmt_info);
    5985                 :             : 
    5986                 :             :   /* Make sure that the target supports an appropriate internal
    5987                 :             :      function for the gather/scatter operation.  */
    5988                 :       31800 :   gather_scatter_info gs_info;
    5989                 :       31800 :   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
    5990                 :       31800 :       || gs_info.ifn == IFN_LAST)
    5991                 :             :     return NULL;
    5992                 :             : 
    5993                 :             :   /* Convert the mask to the right form.  */
    5994                 :           0 :   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
    5995                 :             :                                                  gs_info.element_type);
    5996                 :           0 :   if (mask)
    5997                 :           0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    5998                 :             :                                           loop_vinfo);
    5999                 :           0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6000                 :           0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6001                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6002                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6003                 :           0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6004                 :             : 
    6005                 :             :   /* Get the invariant base and non-invariant offset, converting the
    6006                 :             :      latter to the same width as the vector elements.  */
    6007                 :           0 :   tree base = gs_info.base;
    6008                 :           0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6009                 :           0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6010                 :             :                                                 gs_info.offset, stmt_info);
    6011                 :             : 
    6012                 :             :   /* Build the new pattern statement.  */
    6013                 :           0 :   tree scale = size_int (gs_info.scale);
    6014                 :           0 :   gcall *pattern_stmt;
    6015                 :             : 
    6016                 :           0 :   if (DR_IS_READ (dr))
    6017                 :             :     {
    6018                 :           0 :       tree zero = build_zero_cst (gs_info.element_type);
    6019                 :           0 :       if (mask != NULL)
    6020                 :             :         {
    6021                 :           0 :           int elsval = MASK_LOAD_ELSE_ZERO;
    6022                 :             : 
    6023                 :           0 :           tree vec_els
    6024                 :           0 :             = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
    6025                 :           0 :           pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
    6026                 :             :                                                      offset, scale, zero, mask,
    6027                 :             :                                                      vec_els);
    6028                 :             :         }
    6029                 :             :       else
    6030                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
    6031                 :             :                                                    offset, scale, zero);
    6032                 :           0 :       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
    6033                 :           0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6034                 :             :     }
    6035                 :             :   else
    6036                 :             :     {
    6037                 :           0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6038                 :           0 :       if (mask != NULL)
    6039                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6040                 :             :                                                    base, offset, scale, rhs,
    6041                 :             :                                                    mask);
    6042                 :             :       else
    6043                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
    6044                 :             :                                                    base, offset, scale, rhs);
    6045                 :             :     }
    6046                 :           0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6047                 :             : 
    6048                 :             :   /* Copy across relevant vectorization info and associate DR with the
    6049                 :             :      new pattern statement instead of the original statement.  */
    6050                 :           0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6051                 :           0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6052                 :             : 
    6053                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6054                 :           0 :   *type_out = vectype;
    6055                 :           0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6056                 :             : 
    6057                 :           0 :   return pattern_stmt;
    6058                 :             : }
    6059                 :             : 
    6060                 :             : /* Helper method of vect_recog_cond_store_pattern,  checks to see if COND_ARG
    6061                 :             :    is points to a load statement that reads the same data as that of
    6062                 :             :    STORE_VINFO.  */
    6063                 :             : 
    6064                 :             : static bool
    6065                 :       27473 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
    6066                 :             :                                   stmt_vec_info store_vinfo, tree cond_arg)
    6067                 :             : {
    6068                 :       27473 :   stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
    6069                 :       27473 :   if (!load_stmt_vinfo
    6070                 :       15536 :       || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
    6071                 :       10426 :       || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
    6072                 :       37899 :       || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
    6073                 :             :                           STMT_VINFO_DATA_REF (load_stmt_vinfo)))
    6074                 :       19662 :     return false;
    6075                 :             : 
    6076                 :             :   return true;
    6077                 :             : }
    6078                 :             : 
    6079                 :             : /* Function vect_recog_cond_store_pattern
    6080                 :             : 
    6081                 :             :    Try to find the following pattern:
    6082                 :             : 
    6083                 :             :    x = *_3;
    6084                 :             :    c = a CMP b;
    6085                 :             :    y = c ? t_20 : x;
    6086                 :             :    *_3 = y;
    6087                 :             : 
    6088                 :             :    where the store of _3 happens on a conditional select on a value loaded
    6089                 :             :    from the same location.  In such case we can elide the initial load if
    6090                 :             :    MASK_STORE is supported and instead only conditionally write out the result.
    6091                 :             : 
    6092                 :             :    The pattern produces for the above:
    6093                 :             : 
    6094                 :             :    c = a CMP b;
    6095                 :             :    .MASK_STORE (_3, c, t_20)
    6096                 :             : 
    6097                 :             :    Input:
    6098                 :             : 
    6099                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    6100                 :             :    example, when this function is called with _3 then the search begins.
    6101                 :             : 
    6102                 :             :    Output:
    6103                 :             : 
    6104                 :             :    * TYPE_OUT: The type of the output  of this pattern.
    6105                 :             : 
    6106                 :             :    * Return value: A new stmt that will be used to replace the sequence.  */
    6107                 :             : 
    6108                 :             : static gimple *
    6109                 :    27632001 : vect_recog_cond_store_pattern (vec_info *vinfo,
    6110                 :             :                                stmt_vec_info stmt_vinfo, tree *type_out)
    6111                 :             : {
    6112                 :    27632001 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6113                 :     3160233 :   if (!loop_vinfo)
    6114                 :             :     return NULL;
    6115                 :             : 
    6116                 :     3160233 :   gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
    6117                 :             : 
    6118                 :             :   /* Needs to be a gimple store where we have DR info for.  */
    6119                 :     3160233 :   if (!STMT_VINFO_DATA_REF (stmt_vinfo)
    6120                 :      747674 :       || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
    6121                 :     3423960 :       || !gimple_store_p (store_stmt))
    6122                 :     2898218 :     return NULL;
    6123                 :             : 
    6124                 :      262015 :   tree st_rhs = gimple_assign_rhs1 (store_stmt);
    6125                 :             : 
    6126                 :      262015 :   if (TREE_CODE (st_rhs) != SSA_NAME)
    6127                 :             :     return NULL;
    6128                 :             : 
    6129                 :      213857 :   auto cond_vinfo = vinfo->lookup_def (st_rhs);
    6130                 :             : 
    6131                 :             :   /* If the condition isn't part of the loop then bool recog wouldn't have seen
    6132                 :             :      it and so this transformation may not be valid.  */
    6133                 :      213857 :   if (!cond_vinfo)
    6134                 :             :     return NULL;
    6135                 :             : 
    6136                 :      198772 :   cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
    6137                 :    27822084 :   gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
    6138                 :      246028 :   if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
    6139                 :             :     return NULL;
    6140                 :             : 
    6141                 :             :   /* Check if the else value matches the original loaded one.  */
    6142                 :       14388 :   bool invert = false;
    6143                 :       14388 :   tree cmp_ls = gimple_arg (cond_stmt, 0);
    6144                 :       14388 :   if (TREE_CODE (cmp_ls) != SSA_NAME)
    6145                 :             :     return NULL;
    6146                 :             : 
    6147                 :       14388 :   tree cond_arg1 = gimple_arg (cond_stmt, 1);
    6148                 :       14388 :   tree cond_arg2 = gimple_arg (cond_stmt, 2);
    6149                 :             : 
    6150                 :       14388 :   if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
    6151                 :       14388 :       && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
    6152                 :             :                                                       cond_arg1)))
    6153                 :             :     return NULL;
    6154                 :             : 
    6155                 :        7811 :   vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
    6156                 :             : 
    6157                 :        7811 :   tree scalar_type = TREE_TYPE (st_rhs);
    6158                 :        7811 :   if (VECTOR_TYPE_P (scalar_type))
    6159                 :             :     return NULL;
    6160                 :             : 
    6161                 :        7811 :   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    6162                 :        7811 :   if (vectype == NULL_TREE)
    6163                 :             :     return NULL;
    6164                 :             : 
    6165                 :        7811 :   machine_mode mask_mode;
    6166                 :        7811 :   machine_mode vecmode = TYPE_MODE (vectype);
    6167                 :        1644 :   if (!VECTOR_MODE_P (vecmode)
    6168                 :        7811 :       || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
    6169                 :           0 :       || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
    6170                 :        7811 :       || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
    6171                 :        7811 :     return NULL;
    6172                 :             : 
    6173                 :           0 :   tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
    6174                 :           0 :   if (may_be_nonaddressable_p (base))
    6175                 :             :     return NULL;
    6176                 :             : 
    6177                 :             :   /* We need to use the false parameter of the conditional select.  */
    6178                 :           0 :   tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
    6179                 :           0 :   tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
    6180                 :           0 :   gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
    6181                 :             : 
    6182                 :             :   /* This is a rough estimation to check that there aren't any aliasing stores
    6183                 :             :      in between the load and store.  It's a bit strict, but for now it's good
    6184                 :             :      enough.  */
    6185                 :           0 :   if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
    6186                 :             :     return NULL;
    6187                 :             : 
    6188                 :             :   /* If we have to invert the condition, i.e. use the true argument rather than
    6189                 :             :      the false argument, we have to negate the mask.  */
    6190                 :           0 :   if (invert)
    6191                 :             :     {
    6192                 :           0 :       tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    6193                 :             : 
    6194                 :             :       /* Invert the mask using ^ 1.  */
    6195                 :           0 :       tree itype = TREE_TYPE (cmp_ls);
    6196                 :           0 :       gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
    6197                 :           0 :                                            build_int_cst (itype, 1));
    6198                 :             : 
    6199                 :           0 :       tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
    6200                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
    6201                 :           0 :       cmp_ls= var;
    6202                 :             :     }
    6203                 :             : 
    6204                 :           0 :   if (TREE_CODE (base) != MEM_REF)
    6205                 :           0 :    base = build_fold_addr_expr (base);
    6206                 :             : 
    6207                 :           0 :   tree ptr = build_int_cst (reference_alias_ptr_type (base),
    6208                 :           0 :                             get_object_alignment (base));
    6209                 :             : 
    6210                 :             :   /* Convert the mask to the right form.  */
    6211                 :           0 :   tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
    6212                 :             :                                              vinfo);
    6213                 :             : 
    6214                 :           0 :   gcall *call
    6215                 :           0 :     = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
    6216                 :             :                                   cond_store_arg);
    6217                 :           0 :   gimple_set_location (call, gimple_location (store_stmt));
    6218                 :             : 
    6219                 :             :   /* Copy across relevant vectorization info and associate DR with the
    6220                 :             :      new pattern statement instead of the original statement.  */
    6221                 :           0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
    6222                 :           0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    6223                 :             : 
    6224                 :           0 :   *type_out = vectype;
    6225                 :           0 :   return call;
    6226                 :             : }
    6227                 :             : 
    6228                 :             : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6229                 :             :    that we want to consider for narrowing.  */
    6230                 :             : 
    6231                 :             : static bool
    6232                 :    55813308 : vect_narrowable_type_p (tree type)
    6233                 :             : {
    6234                 :    55813308 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6235                 :             : }
    6236                 :             : 
    6237                 :             : /* Return true if the operation given by CODE can be truncated to N bits
    6238                 :             :    when only N bits of the output are needed.  This is only true if bit N+1
    6239                 :             :    of the inputs has no effect on the low N bits of the result.  */
    6240                 :             : 
    6241                 :             : static bool
    6242                 :    13510952 : vect_truncatable_operation_p (tree_code code)
    6243                 :             : {
    6244                 :    13510952 :   switch (code)
    6245                 :             :     {
    6246                 :             :     case NEGATE_EXPR:
    6247                 :             :     case PLUS_EXPR:
    6248                 :             :     case MINUS_EXPR:
    6249                 :             :     case MULT_EXPR:
    6250                 :             :     case BIT_NOT_EXPR:
    6251                 :             :     case BIT_AND_EXPR:
    6252                 :             :     case BIT_IOR_EXPR:
    6253                 :             :     case BIT_XOR_EXPR:
    6254                 :             :     case COND_EXPR:
    6255                 :             :       return true;
    6256                 :             : 
    6257                 :     5252323 :     default:
    6258                 :     5252323 :       return false;
    6259                 :             :     }
    6260                 :             : }
    6261                 :             : 
    6262                 :             : /* Record that STMT_INFO could be changed from operating on TYPE to
    6263                 :             :    operating on a type with the precision and sign given by PRECISION
    6264                 :             :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6265                 :             :    it might not be a whole number of bytes.  */
    6266                 :             : 
    6267                 :             : static void
    6268                 :     1892921 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6269                 :             :                          unsigned int precision, signop sign)
    6270                 :             : {
    6271                 :             :   /* Round the precision up to a whole number of bytes.  */
    6272                 :     1892921 :   precision = vect_element_precision (precision);
    6273                 :     1892921 :   if (precision < TYPE_PRECISION (type)
    6274                 :     1892921 :       && (!stmt_info->operation_precision
    6275                 :       30371 :           || stmt_info->operation_precision > precision))
    6276                 :             :     {
    6277                 :     1188378 :       stmt_info->operation_precision = precision;
    6278                 :     1188378 :       stmt_info->operation_sign = sign;
    6279                 :             :     }
    6280                 :     1892921 : }
    6281                 :             : 
    6282                 :             : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6283                 :             :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6284                 :             :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6285                 :             : 
    6286                 :             : static void
    6287                 :     9753320 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6288                 :             :                               unsigned int min_input_precision)
    6289                 :             : {
    6290                 :             :   /* This operation in isolation only requires the inputs to have
    6291                 :             :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6292                 :             :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6293                 :             :      as a whole.  E.g. consider something like:
    6294                 :             : 
    6295                 :             :          unsigned short *x, *y;
    6296                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6297                 :             : 
    6298                 :             :      The right shift can be done on unsigned chars, and only requires the
    6299                 :             :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6300                 :             :      approach would mean turning a natural chain of single-vector unsigned
    6301                 :             :      short operations into one that truncates "*x" and then extends
    6302                 :             :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6303                 :             :      operation and one vector for each unsigned char operation.
    6304                 :             :      This would be a significant pessimization.
    6305                 :             : 
    6306                 :             :      Instead only propagate the maximum of this precision and the precision
    6307                 :             :      required by the users of the result.  This means that we don't pessimize
    6308                 :             :      the case above but continue to optimize things like:
    6309                 :             : 
    6310                 :             :          unsigned char *y;
    6311                 :             :          unsigned short *x;
    6312                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6313                 :             : 
    6314                 :             :      Here we would truncate two vectors of *x to a single vector of
    6315                 :             :      unsigned chars and use single-vector unsigned char operations for
    6316                 :             :      everything else, rather than doing two unsigned short copies of
    6317                 :             :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6318                 :     9753320 :   min_input_precision = MAX (min_input_precision,
    6319                 :             :                              stmt_info->min_output_precision);
    6320                 :             : 
    6321                 :     9753320 :   if (min_input_precision < TYPE_PRECISION (type)
    6322                 :     9753320 :       && (!stmt_info->min_input_precision
    6323                 :       50302 :           || stmt_info->min_input_precision > min_input_precision))
    6324                 :      508553 :     stmt_info->min_input_precision = min_input_precision;
    6325                 :     9753320 : }
    6326                 :             : 
    6327                 :             : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6328                 :             :    we can calculate a reduced number of output bits for STMT_INFO,
    6329                 :             :    whose result is LHS.  */
    6330                 :             : 
    6331                 :             : static bool
    6332                 :    13084078 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6333                 :             :                                        stmt_vec_info stmt_info, tree lhs)
    6334                 :             : {
    6335                 :             :   /* Take the maximum precision required by users of the result.  */
    6336                 :    13084078 :   unsigned int precision = 0;
    6337                 :    13084078 :   imm_use_iterator iter;
    6338                 :    13084078 :   use_operand_p use;
    6339                 :    13825161 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6340                 :             :     {
    6341                 :    13576259 :       gimple *use_stmt = USE_STMT (use);
    6342                 :    13576259 :       if (is_gimple_debug (use_stmt))
    6343                 :      483451 :         continue;
    6344                 :    13092808 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6345                 :    13092808 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6346                 :             :         return false;
    6347                 :             :       /* The input precision recorded for COND_EXPRs applies only to the
    6348                 :             :          "then" and "else" values.  */
    6349                 :      258091 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6350                 :      221334 :       if (assign
    6351                 :      221334 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6352                 :         459 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6353                 :         459 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6354                 :             :         return false;
    6355                 :      257632 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6356                 :             :     }
    6357                 :             : 
    6358                 :      248902 :   if (dump_enabled_p ())
    6359                 :        6619 :     dump_printf_loc (MSG_NOTE, vect_location,
    6360                 :             :                      "only the low %d bits of %T are significant\n",
    6361                 :             :                      precision, lhs);
    6362                 :      248902 :   stmt_info->min_output_precision = precision;
    6363                 :      248902 :   return true;
    6364                 :             : }
    6365                 :             : 
    6366                 :             : /* Calculate min_output_precision for STMT_INFO.  */
    6367                 :             : 
    6368                 :             : static void
    6369                 :    33978850 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6370                 :             : {
    6371                 :             :   /* We're only interested in statements with a narrowable result.  */
    6372                 :    33978850 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6373                 :    33978850 :   if (!lhs
    6374                 :    26688313 :       || TREE_CODE (lhs) != SSA_NAME
    6375                 :    56398890 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6376                 :             :     return;
    6377                 :             : 
    6378                 :    13084078 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6379                 :    12835176 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6380                 :             : }
    6381                 :             : 
    6382                 :             : /* Use range information to decide whether STMT (described by STMT_INFO)
    6383                 :             :    could be done in a narrower type.  This is effectively a forward
    6384                 :             :    propagation, since it uses context-independent information that applies
    6385                 :             :    to all users of an SSA name.  */
    6386                 :             : 
    6387                 :             : static void
    6388                 :    18691700 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6389                 :             : {
    6390                 :    18691700 :   tree lhs = gimple_assign_lhs (stmt);
    6391                 :    18691700 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6392                 :    16892334 :     return;
    6393                 :             : 
    6394                 :    14701568 :   tree type = TREE_TYPE (lhs);
    6395                 :    14701568 :   if (!vect_narrowable_type_p (type))
    6396                 :             :     return;
    6397                 :             : 
    6398                 :             :   /* First see whether we have any useful range information for the result.  */
    6399                 :     9882957 :   unsigned int precision = TYPE_PRECISION (type);
    6400                 :     9882957 :   signop sign = TYPE_SIGN (type);
    6401                 :     9882957 :   wide_int min_value, max_value;
    6402                 :     9882957 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6403                 :             :     return;
    6404                 :             : 
    6405                 :     4476758 :   tree_code code = gimple_assign_rhs_code (stmt);
    6406                 :     4476758 :   unsigned int nops = gimple_num_ops (stmt);
    6407                 :             : 
    6408                 :     4476758 :   if (!vect_truncatable_operation_p (code))
    6409                 :             :     {
    6410                 :             :       /* Handle operations that can be computed in type T if all inputs
    6411                 :             :          and outputs can be represented in type T.  Also handle left and
    6412                 :             :          right shifts, where (in addition) the maximum shift amount must
    6413                 :             :          be less than the number of bits in T.  */
    6414                 :     1679696 :       bool is_shift;
    6415                 :     1679696 :       switch (code)
    6416                 :             :         {
    6417                 :             :         case LSHIFT_EXPR:
    6418                 :             :         case RSHIFT_EXPR:
    6419                 :             :           is_shift = true;
    6420                 :             :           break;
    6421                 :             : 
    6422                 :      201983 :         case ABS_EXPR:
    6423                 :      201983 :         case MIN_EXPR:
    6424                 :      201983 :         case MAX_EXPR:
    6425                 :      201983 :         case TRUNC_DIV_EXPR:
    6426                 :      201983 :         case CEIL_DIV_EXPR:
    6427                 :      201983 :         case FLOOR_DIV_EXPR:
    6428                 :      201983 :         case ROUND_DIV_EXPR:
    6429                 :      201983 :         case EXACT_DIV_EXPR:
    6430                 :             :           /* Modulus is excluded because it is typically calculated by doing
    6431                 :             :              a division, for which minimum signed / -1 isn't representable in
    6432                 :             :              the original signed type.  We could take the division range into
    6433                 :             :              account instead, if handling modulus ever becomes important.  */
    6434                 :      201983 :           is_shift = false;
    6435                 :      201983 :           break;
    6436                 :             : 
    6437                 :             :         default:
    6438                 :             :           return;
    6439                 :             :         }
    6440                 :     1101964 :       for (unsigned int i = 1; i < nops; ++i)
    6441                 :             :         {
    6442                 :      847861 :           tree op = gimple_op (stmt, i);
    6443                 :      847861 :           wide_int op_min_value, op_max_value;
    6444                 :      847861 :           if (TREE_CODE (op) == INTEGER_CST)
    6445                 :             :             {
    6446                 :      259268 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6447                 :      259268 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6448                 :             :             }
    6449                 :      588593 :           else if (TREE_CODE (op) == SSA_NAME)
    6450                 :             :             {
    6451                 :      588593 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    6452                 :             :                 return;
    6453                 :             :             }
    6454                 :             :           else
    6455                 :             :             return;
    6456                 :             : 
    6457                 :      567155 :           if (is_shift && i == 2)
    6458                 :             :             {
    6459                 :             :               /* There needs to be one more bit than the maximum shift amount.
    6460                 :             : 
    6461                 :             :                  If the maximum shift amount is already 1 less than PRECISION
    6462                 :             :                  then we can't narrow the shift further.  Dealing with that
    6463                 :             :                  case first ensures that we can safely use an unsigned range
    6464                 :             :                  below.
    6465                 :             : 
    6466                 :             :                  op_min_value isn't relevant, since shifts by negative amounts
    6467                 :             :                  are UB.  */
    6468                 :      168175 :               if (wi::geu_p (op_max_value, precision - 1))
    6469                 :             :                 return;
    6470                 :      152083 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    6471                 :             : 
    6472                 :             :               /* As explained below, we can convert a signed shift into an
    6473                 :             :                  unsigned shift if the sign bit is always clear.  At this
    6474                 :             :                  point we've already processed the ranges of the output and
    6475                 :             :                  the first input.  */
    6476                 :      152083 :               auto op_sign = sign;
    6477                 :      152083 :               if (sign == SIGNED && !wi::neg_p (min_value))
    6478                 :             :                 op_sign = UNSIGNED;
    6479                 :      304166 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    6480                 :      152083 :                                              precision, op_sign);
    6481                 :      304166 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    6482                 :      152083 :                                              precision, op_sign);
    6483                 :             :             }
    6484                 :      551063 :           min_value = wi::min (min_value, op_min_value, sign);
    6485                 :      551063 :           max_value = wi::max (max_value, op_max_value, sign);
    6486                 :      847861 :         }
    6487                 :             :     }
    6488                 :             : 
    6489                 :             :   /* Try to switch signed types for unsigned types if we can.
    6490                 :             :      This is better for two reasons.  First, unsigned ops tend
    6491                 :             :      to be cheaper than signed ops.  Second, it means that we can
    6492                 :             :      handle things like:
    6493                 :             : 
    6494                 :             :         signed char c;
    6495                 :             :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    6496                 :             : 
    6497                 :             :      as:
    6498                 :             : 
    6499                 :             :         signed char c;
    6500                 :             :         unsigned short res_1 = (unsigned short) c & 0xff00;
    6501                 :             :         int res = (int) res_1;
    6502                 :             : 
    6503                 :             :      where the intermediate result res_1 has unsigned rather than
    6504                 :             :      signed type.  */
    6505                 :     3051165 :   if (sign == SIGNED && !wi::neg_p (min_value))
    6506                 :             :     sign = UNSIGNED;
    6507                 :             : 
    6508                 :             :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    6509                 :     3051165 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    6510                 :     3051165 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    6511                 :     3051165 :   unsigned int value_precision = MAX (precision1, precision2);
    6512                 :     3051165 :   if (value_precision >= precision)
    6513                 :             :     return;
    6514                 :             : 
    6515                 :     1799366 :   if (dump_enabled_p ())
    6516                 :       77953 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6517                 :             :                      " without loss of precision: %G",
    6518                 :             :                      sign == SIGNED ? "signed" : "unsigned",
    6519                 :             :                      value_precision, (gimple *) stmt);
    6520                 :             : 
    6521                 :     1799366 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    6522                 :     1799366 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    6523                 :     9882957 : }
    6524                 :             : 
    6525                 :             : /* Use information about the users of STMT's result to decide whether
    6526                 :             :    STMT (described by STMT_INFO) could be done in a narrower type.
    6527                 :             :    This is effectively a backward propagation.  */
    6528                 :             : 
    6529                 :             : static void
    6530                 :    18691700 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    6531                 :             : {
    6532                 :    18691700 :   tree_code code = gimple_assign_rhs_code (stmt);
    6533                 :    18691700 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    6534                 :    18691700 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    6535                 :    18691700 :   if (!vect_narrowable_type_p (type))
    6536                 :    10737746 :     return;
    6537                 :             : 
    6538                 :    11620935 :   unsigned int precision = TYPE_PRECISION (type);
    6539                 :    11620935 :   unsigned int operation_precision, min_input_precision;
    6540                 :    11620935 :   switch (code)
    6541                 :             :     {
    6542                 :     2134827 :     CASE_CONVERT:
    6543                 :             :       /* Only the bits that contribute to the output matter.  Don't change
    6544                 :             :          the precision of the operation itself.  */
    6545                 :     2134827 :       operation_precision = precision;
    6546                 :     2134827 :       min_input_precision = stmt_info->min_output_precision;
    6547                 :     2134827 :       break;
    6548                 :             : 
    6549                 :      451914 :     case LSHIFT_EXPR:
    6550                 :      451914 :     case RSHIFT_EXPR:
    6551                 :      451914 :       {
    6552                 :      451914 :         tree shift = gimple_assign_rhs2 (stmt);
    6553                 :      451914 :         if (TREE_CODE (shift) != INTEGER_CST
    6554                 :      809510 :             || !wi::ltu_p (wi::to_widest (shift), precision))
    6555                 :       94354 :           return;
    6556                 :      357560 :         unsigned int const_shift = TREE_INT_CST_LOW (shift);
    6557                 :      357560 :         if (code == LSHIFT_EXPR)
    6558                 :             :           {
    6559                 :             :             /* Avoid creating an undefined shift.
    6560                 :             : 
    6561                 :             :                ??? We could instead use min_output_precision as-is and
    6562                 :             :                optimize out-of-range shifts to zero.  However, only
    6563                 :             :                degenerate testcases shift away all their useful input data,
    6564                 :             :                and it isn't natural to drop input operations in the middle
    6565                 :             :                of vectorization.  This sort of thing should really be
    6566                 :             :                handled before vectorization.  */
    6567                 :       92537 :             operation_precision = MAX (stmt_info->min_output_precision,
    6568                 :             :                                        const_shift + 1);
    6569                 :             :             /* We need CONST_SHIFT fewer bits of the input.  */
    6570                 :       92537 :             min_input_precision = (MAX (operation_precision, const_shift)
    6571                 :       92537 :                                    - const_shift);
    6572                 :             :           }
    6573                 :             :         else
    6574                 :             :           {
    6575                 :             :             /* We need CONST_SHIFT extra bits to do the operation.  */
    6576                 :      265023 :             operation_precision = (stmt_info->min_output_precision
    6577                 :             :                                    + const_shift);
    6578                 :      265023 :             min_input_precision = operation_precision;
    6579                 :             :           }
    6580                 :             :         break;
    6581                 :             :       }
    6582                 :             : 
    6583                 :     9034194 :     default:
    6584                 :     9034194 :       if (vect_truncatable_operation_p (code))
    6585                 :             :         {
    6586                 :             :           /* Input bit N has no effect on output bits N-1 and lower.  */
    6587                 :     5461567 :           operation_precision = stmt_info->min_output_precision;
    6588                 :     5461567 :           min_input_precision = operation_precision;
    6589                 :     5461567 :           break;
    6590                 :             :         }
    6591                 :             :       return;
    6592                 :             :     }
    6593                 :             : 
    6594                 :     7953954 :   if (operation_precision < precision)
    6595                 :             :     {
    6596                 :       93555 :       if (dump_enabled_p ())
    6597                 :        3327 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6598                 :             :                          " without affecting users: %G",
    6599                 :        3327 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    6600                 :             :                          operation_precision, (gimple *) stmt);
    6601                 :      187110 :       vect_set_operation_type (stmt_info, type, operation_precision,
    6602                 :       93555 :                                TYPE_SIGN (type));
    6603                 :             :     }
    6604                 :     7953954 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    6605                 :             : }
    6606                 :             : 
    6607                 :             : /* Return true if the statement described by STMT_INFO sets a boolean
    6608                 :             :    SSA_NAME and if we know how to vectorize this kind of statement using
    6609                 :             :    vector mask types.  */
    6610                 :             : 
    6611                 :             : static bool
    6612                 :    33978850 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    6613                 :             : {
    6614                 :    33978850 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6615                 :    33978850 :   tree_code code = ERROR_MARK;
    6616                 :    33978850 :   gassign *assign = NULL;
    6617                 :    33978850 :   gcond *cond = NULL;
    6618                 :             : 
    6619                 :    33978850 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    6620                 :    25127134 :     code = gimple_assign_rhs_code (assign);
    6621                 :    15287150 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    6622                 :             :     {
    6623                 :     4513539 :       lhs = gimple_cond_lhs (cond);
    6624                 :     4513539 :       code = gimple_cond_code (cond);
    6625                 :             :     }
    6626                 :             : 
    6627                 :    33978850 :   if (!lhs
    6628                 :    31201852 :       || TREE_CODE (lhs) != SSA_NAME
    6629                 :    60842768 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6630                 :             :     return false;
    6631                 :             : 
    6632                 :     1942559 :   if (code != ERROR_MARK)
    6633                 :             :     {
    6634                 :     1681084 :       switch (code)
    6635                 :             :         {
    6636                 :             :         CASE_CONVERT:
    6637                 :             :         case SSA_NAME:
    6638                 :             :         case BIT_NOT_EXPR:
    6639                 :             :         case BIT_IOR_EXPR:
    6640                 :             :         case BIT_XOR_EXPR:
    6641                 :             :         case BIT_AND_EXPR:
    6642                 :             :           return true;
    6643                 :             : 
    6644                 :     1306120 :         default:
    6645                 :     1306120 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    6646                 :             :         }
    6647                 :             :     }
    6648                 :      261475 :   else if (is_a <gphi *> (stmt_info->stmt))
    6649                 :      165193 :     return true;
    6650                 :             :   return false;
    6651                 :             : }
    6652                 :             : 
    6653                 :             : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    6654                 :             :    a vector mask type instead of a normal vector type.  Record the
    6655                 :             :    result in STMT_INFO->mask_precision.  */
    6656                 :             : 
    6657                 :             : static void
    6658                 :    33978850 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6659                 :             : {
    6660                 :    33978850 :   if (!possible_vector_mask_operation_p (stmt_info))
    6661                 :             :     return;
    6662                 :             : 
    6663                 :             :   /* If at least one boolean input uses a vector mask type,
    6664                 :             :      pick the mask type with the narrowest elements.
    6665                 :             : 
    6666                 :             :      ??? This is the traditional behavior.  It should always produce
    6667                 :             :      the smallest number of operations, but isn't necessarily the
    6668                 :             :      optimal choice.  For example, if we have:
    6669                 :             : 
    6670                 :             :        a = b & c
    6671                 :             : 
    6672                 :             :      where:
    6673                 :             : 
    6674                 :             :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    6675                 :             :        - b also uses M16
    6676                 :             :        - c uses a mask type for 8-bit elements (M8)
    6677                 :             : 
    6678                 :             :      then picking M8 gives:
    6679                 :             : 
    6680                 :             :        - 1 M16->M8 pack for b
    6681                 :             :        - 1 M8 AND for a
    6682                 :             :        - 2 M8->M16 unpacks for the user of a
    6683                 :             : 
    6684                 :             :      whereas picking M16 would have given:
    6685                 :             : 
    6686                 :             :        - 2 M8->M16 unpacks for c
    6687                 :             :        - 2 M16 ANDs for a
    6688                 :             : 
    6689                 :             :      The number of operations are equal, but M16 would have given
    6690                 :             :      a shorter dependency chain and allowed more ILP.  */
    6691                 :     1808401 :   unsigned int precision = ~0U;
    6692                 :     1808401 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    6693                 :             : 
    6694                 :             :   /* If the statement compares two values that shouldn't use vector masks,
    6695                 :             :      try comparing the values as normal scalars instead.  */
    6696                 :     1808401 :   tree_code code = ERROR_MARK;
    6697                 :     1808401 :   tree op0_type;
    6698                 :     1808401 :   unsigned int nops = -1;
    6699                 :     1808401 :   unsigned int ops_start = 0;
    6700                 :             : 
    6701                 :     1808401 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    6702                 :             :     {
    6703                 :     1139675 :       code = gimple_assign_rhs_code (assign);
    6704                 :     1139675 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    6705                 :     1139675 :       nops = gimple_num_ops (assign);
    6706                 :     1139675 :       ops_start = 1;
    6707                 :             :     }
    6708                 :      668726 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    6709                 :             :     {
    6710                 :      503533 :       code = gimple_cond_code (cond);
    6711                 :      503533 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    6712                 :      503533 :       nops = 2;
    6713                 :      503533 :       ops_start = 0;
    6714                 :             :     }
    6715                 :             : 
    6716                 :     1643208 :   if (code != ERROR_MARK)
    6717                 :             :     {
    6718                 :     4893040 :       for (unsigned int i = ops_start; i < nops; ++i)
    6719                 :             :         {
    6720                 :     3249832 :           tree rhs = gimple_op (stmt, i);
    6721                 :     3249832 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    6722                 :     1539264 :             continue;
    6723                 :             : 
    6724                 :     1710568 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6725                 :     1710568 :           if (!def_stmt_info)
    6726                 :             :             /* Don't let external or constant operands influence the choice.
    6727                 :             :                We can convert them to whichever vector type we pick.  */
    6728                 :      514996 :             continue;
    6729                 :             : 
    6730                 :     1195572 :           if (def_stmt_info->mask_precision)
    6731                 :             :             {
    6732                 :     1011349 :               if (precision > def_stmt_info->mask_precision)
    6733                 :     3249832 :                 precision = def_stmt_info->mask_precision;
    6734                 :             :             }
    6735                 :             :         }
    6736                 :             : 
    6737                 :     1643208 :       if (precision == ~0U
    6738                 :     1317046 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    6739                 :             :         {
    6740                 :     1127443 :           scalar_mode mode;
    6741                 :     1127443 :           tree vectype, mask_type;
    6742                 :     1127443 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    6743                 :     1127443 :               && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
    6744                 :      971553 :               && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
    6745                 :      971553 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    6746                 :     1489432 :             precision = GET_MODE_BITSIZE (mode);
    6747                 :             :         }
    6748                 :             :     }
    6749                 :             :   else
    6750                 :             :     {
    6751                 :      165193 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    6752                 :      603140 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    6753                 :             :         {
    6754                 :      437947 :           tree rhs = gimple_phi_arg_def (phi, i);
    6755                 :             : 
    6756                 :      437947 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6757                 :      437947 :           if (!def_stmt_info)
    6758                 :             :             /* Don't let external or constant operands influence the choice.
    6759                 :             :                We can convert them to whichever vector type we pick.  */
    6760                 :      258100 :             continue;
    6761                 :             : 
    6762                 :      179847 :           if (def_stmt_info->mask_precision)
    6763                 :             :             {
    6764                 :      155874 :               if (precision > def_stmt_info->mask_precision)
    6765                 :      437947 :                 precision = def_stmt_info->mask_precision;
    6766                 :             :             }
    6767                 :             :         }
    6768                 :             :     }
    6769                 :             : 
    6770                 :     1808401 :   if (dump_enabled_p ())
    6771                 :             :     {
    6772                 :        6908 :       if (precision == ~0U)
    6773                 :        1667 :         dump_printf_loc (MSG_NOTE, vect_location,
    6774                 :             :                          "using normal nonmask vectors for %G",
    6775                 :             :                          stmt_info->stmt);
    6776                 :             :       else
    6777                 :        5241 :         dump_printf_loc (MSG_NOTE, vect_location,
    6778                 :             :                          "using boolean precision %d for %G",
    6779                 :             :                          precision, stmt_info->stmt);
    6780                 :             :     }
    6781                 :             : 
    6782                 :     1808401 :   stmt_info->mask_precision = precision;
    6783                 :             : }
    6784                 :             : 
    6785                 :             : /* Handle vect_determine_precisions for STMT_INFO, given that we
    6786                 :             :    have already done so for the users of its result.  */
    6787                 :             : 
    6788                 :             : void
    6789                 :    33978850 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    6790                 :             : {
    6791                 :    33978850 :   vect_determine_min_output_precision (vinfo, stmt_info);
    6792                 :    33978850 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    6793                 :             :     {
    6794                 :    18691700 :       vect_determine_precisions_from_range (stmt_info, stmt);
    6795                 :    18691700 :       vect_determine_precisions_from_users (stmt_info, stmt);
    6796                 :             :     }
    6797                 :    33978850 : }
    6798                 :             : 
    6799                 :             : /* Walk backwards through the vectorizable region to determine the
    6800                 :             :    values of these fields:
    6801                 :             : 
    6802                 :             :    - min_output_precision
    6803                 :             :    - min_input_precision
    6804                 :             :    - operation_precision
    6805                 :             :    - operation_sign.  */
    6806                 :             : 
    6807                 :             : void
    6808                 :      892880 : vect_determine_precisions (vec_info *vinfo)
    6809                 :             : {
    6810                 :      892880 :   basic_block *bbs = vinfo->bbs;
    6811                 :      892880 :   unsigned int nbbs = vinfo->nbbs;
    6812                 :             : 
    6813                 :      892880 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    6814                 :             : 
    6815                 :    11432570 :   for (unsigned int i = 0; i < nbbs; i++)
    6816                 :             :     {
    6817                 :    10539690 :       basic_block bb = bbs[i];
    6818                 :    17298973 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6819                 :             :         {
    6820                 :     6759283 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6821                 :     6759283 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6822                 :     6581601 :             vect_determine_mask_precision (vinfo, stmt_info);
    6823                 :             :         }
    6824                 :    98247556 :       for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6825                 :             :         {
    6826                 :    77168176 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6827                 :    77168176 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6828                 :    27397249 :             vect_determine_mask_precision (vinfo, stmt_info);
    6829                 :             :         }
    6830                 :             :     }
    6831                 :    11432570 :   for (unsigned int i = 0; i < nbbs; i++)
    6832                 :             :     {
    6833                 :    10539690 :       basic_block bb = bbs[nbbs - i - 1];
    6834                 :   175415732 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    6835                 :             :         {
    6836                 :    77168176 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6837                 :    77168176 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6838                 :    27397249 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    6839                 :             :         }
    6840                 :    17298973 :       for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6841                 :             :         {
    6842                 :     6759283 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6843                 :     6759283 :           if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6844                 :     6581601 :             vect_determine_stmt_precisions (vinfo, stmt_info);
    6845                 :             :         }
    6846                 :             :     }
    6847                 :      892880 : }
    6848                 :             : 
    6849                 :             : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    6850                 :             : 
    6851                 :             : struct vect_recog_func
    6852                 :             : {
    6853                 :             :   vect_recog_func_ptr fn;
    6854                 :             :   const char *name;
    6855                 :             : };
    6856                 :             : 
    6857                 :             : /* Note that ordering matters - the first pattern matching on a stmt is
    6858                 :             :    taken which means usually the more complex one needs to preceed the
    6859                 :             :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    6860                 :             : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    6861                 :             :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    6862                 :             :   { vect_recog_bit_insert_pattern, "bit_insert" },
    6863                 :             :   { vect_recog_abd_pattern, "abd" },
    6864                 :             :   { vect_recog_over_widening_pattern, "over_widening" },
    6865                 :             :   /* Must come after over_widening, which narrows the shift as much as
    6866                 :             :      possible beforehand.  */
    6867                 :             :   { vect_recog_average_pattern, "average" },
    6868                 :             :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    6869                 :             :   { vect_recog_mulhs_pattern, "mult_high" },
    6870                 :             :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    6871                 :             :   { vect_recog_widen_mult_pattern, "widen_mult" },
    6872                 :             :   { vect_recog_dot_prod_pattern, "dot_prod" },
    6873                 :             :   { vect_recog_sad_pattern, "sad" },
    6874                 :             :   { vect_recog_widen_sum_pattern, "widen_sum" },
    6875                 :             :   { vect_recog_pow_pattern, "pow" },
    6876                 :             :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    6877                 :             :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    6878                 :             :   { vect_recog_widen_shift_pattern, "widen_shift" },
    6879                 :             :   { vect_recog_rotate_pattern, "rotate" },
    6880                 :             :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    6881                 :             :   { vect_recog_divmod_pattern, "divmod" },
    6882                 :             :   { vect_recog_mod_var_pattern, "modvar" },
    6883                 :             :   { vect_recog_mult_pattern, "mult" },
    6884                 :             :   { vect_recog_sat_add_pattern, "sat_add" },
    6885                 :             :   { vect_recog_sat_sub_pattern, "sat_sub" },
    6886                 :             :   { vect_recog_sat_trunc_pattern, "sat_trunc" },
    6887                 :             :   { vect_recog_gcond_pattern, "gcond" },
    6888                 :             :   { vect_recog_bool_pattern, "bool" },
    6889                 :             :   /* This must come before mask conversion, and includes the parts
    6890                 :             :      of mask conversion that are needed for gather and scatter
    6891                 :             :      internal functions.  */
    6892                 :             :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    6893                 :             :   { vect_recog_cond_store_pattern, "cond_store" },
    6894                 :             :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    6895                 :             :   { vect_recog_widen_plus_pattern, "widen_plus" },
    6896                 :             :   { vect_recog_widen_minus_pattern, "widen_minus" },
    6897                 :             :   { vect_recog_widen_abd_pattern, "widen_abd" },
    6898                 :             :   /* These must come after the double widening ones.  */
    6899                 :             : };
    6900                 :             : 
    6901                 :             : /* Mark statements that are involved in a pattern.  */
    6902                 :             : 
    6903                 :             : void
    6904                 :      714618 : vect_mark_pattern_stmts (vec_info *vinfo,
    6905                 :             :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    6906                 :             :                          tree pattern_vectype)
    6907                 :             : {
    6908                 :      714618 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    6909                 :      714618 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    6910                 :             : 
    6911                 :      714618 :   gimple *orig_pattern_stmt = NULL;
    6912                 :      714618 :   if (is_pattern_stmt_p (orig_stmt_info))
    6913                 :             :     {
    6914                 :             :       /* We're replacing a statement in an existing pattern definition
    6915                 :             :          sequence.  */
    6916                 :       11142 :       orig_pattern_stmt = orig_stmt_info->stmt;
    6917                 :       11142 :       if (dump_enabled_p ())
    6918                 :         603 :         dump_printf_loc (MSG_NOTE, vect_location,
    6919                 :             :                          "replacing earlier pattern %G", orig_pattern_stmt);
    6920                 :             : 
    6921                 :             :       /* To keep the book-keeping simple, just swap the lhs of the
    6922                 :             :          old and new statements, so that the old one has a valid but
    6923                 :             :          unused lhs.  */
    6924                 :       11142 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    6925                 :       11142 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    6926                 :       11142 :       gimple_set_lhs (pattern_stmt, old_lhs);
    6927                 :             : 
    6928                 :       11142 :       if (dump_enabled_p ())
    6929                 :         603 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    6930                 :             : 
    6931                 :             :       /* Switch to the statement that ORIG replaces.  */
    6932                 :       11142 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    6933                 :             : 
    6934                 :             :       /* We shouldn't be replacing the main pattern statement.  */
    6935                 :       11142 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    6936                 :             :                   != orig_pattern_stmt);
    6937                 :             :     }
    6938                 :             : 
    6939                 :      714618 :   if (def_seq)
    6940                 :             :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    6941                 :     1552086 :          !gsi_end_p (si); gsi_next (&si))
    6942                 :             :       {
    6943                 :      938314 :         if (dump_enabled_p ())
    6944                 :       21988 :           dump_printf_loc (MSG_NOTE, vect_location,
    6945                 :             :                            "extra pattern stmt: %G", gsi_stmt (si));
    6946                 :      938314 :         stmt_vec_info pattern_stmt_info
    6947                 :      938314 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    6948                 :             :                                     orig_stmt_info, pattern_vectype);
    6949                 :             :         /* Stmts in the def sequence are not vectorizable cycle or
    6950                 :             :            induction defs, instead they should all be vect_internal_def
    6951                 :             :            feeding the main pattern stmt which retains this def type.  */
    6952                 :      938314 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    6953                 :             :       }
    6954                 :             : 
    6955                 :      714618 :   if (orig_pattern_stmt)
    6956                 :             :     {
    6957                 :       11142 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    6958                 :             :                               orig_stmt_info, pattern_vectype);
    6959                 :             : 
    6960                 :             :       /* Insert all the new pattern statements before the original one.  */
    6961                 :       11142 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    6962                 :       11142 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    6963                 :             :                                                orig_def_seq);
    6964                 :       11142 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    6965                 :       11142 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    6966                 :             : 
    6967                 :             :       /* Remove the pattern statement that this new pattern replaces.  */
    6968                 :       11142 :       gsi_remove (&gsi, false);
    6969                 :             :     }
    6970                 :             :   else
    6971                 :      703476 :     vect_set_pattern_stmt (vinfo,
    6972                 :             :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    6973                 :             : 
    6974                 :             :   /* For any conditionals mark them as vect_condition_def.  */
    6975                 :      714618 :   if (is_a <gcond *> (pattern_stmt))
    6976                 :      240970 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    6977                 :             : 
    6978                 :             :   /* Transfer reduction path info to the pattern.  */
    6979                 :      714618 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    6980                 :             :     {
    6981                 :        8259 :       gimple_match_op op;
    6982                 :        8259 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    6983                 :           0 :         gcc_unreachable ();
    6984                 :        8259 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    6985                 :             :       /* Search the pattern def sequence and the main pattern stmt.  Note
    6986                 :             :          we may have inserted all into a containing pattern def sequence
    6987                 :             :          so the following is a bit awkward.  */
    6988                 :        8259 :       gimple_stmt_iterator si;
    6989                 :        8259 :       gimple *s;
    6990                 :        8259 :       if (def_seq)
    6991                 :             :         {
    6992                 :        7599 :           si = gsi_start (def_seq);
    6993                 :        7599 :           s = gsi_stmt (si);
    6994                 :        7599 :           gsi_next (&si);
    6995                 :             :         }
    6996                 :             :       else
    6997                 :             :         {
    6998                 :             :           si = gsi_none ();
    6999                 :             :           s = pattern_stmt;
    7000                 :             :         }
    7001                 :       15973 :       do
    7002                 :             :         {
    7003                 :       15973 :           bool found = false;
    7004                 :       15973 :           if (gimple_extract_op (s, &op))
    7005                 :       38240 :             for (unsigned i = 0; i < op.num_ops; ++i)
    7006                 :       30688 :               if (op.ops[i] == lookfor)
    7007                 :             :                 {
    7008                 :        8421 :                   STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7009                 :        8421 :                   lookfor = gimple_get_lhs (s);
    7010                 :        8421 :                   found = true;
    7011                 :        8421 :                   break;
    7012                 :             :                 }
    7013                 :       15973 :           if (s == pattern_stmt)
    7014                 :             :             {
    7015                 :        8259 :               if (!found && dump_enabled_p ())
    7016                 :           0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7017                 :             :                                  "failed to update reduction index.\n");
    7018                 :        8259 :               break;
    7019                 :             :             }
    7020                 :        7714 :           if (gsi_end_p (si))
    7021                 :             :             s = pattern_stmt;
    7022                 :             :           else
    7023                 :             :             {
    7024                 :         115 :               s = gsi_stmt (si);
    7025                 :         115 :               if (s == pattern_stmt)
    7026                 :             :                 /* Found the end inside a bigger pattern def seq.  */
    7027                 :             :                 si = gsi_none ();
    7028                 :             :               else
    7029                 :         115 :                 gsi_next (&si);
    7030                 :             :             }
    7031                 :             :         } while (1);
    7032                 :             :     }
    7033                 :      714618 : }
    7034                 :             : 
    7035                 :             : /* Function vect_pattern_recog_1
    7036                 :             : 
    7037                 :             :    Input:
    7038                 :             :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7039                 :             :         computation pattern.
    7040                 :             :    STMT_INFO: A stmt from which the pattern search should start.
    7041                 :             : 
    7042                 :             :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7043                 :             :    a sequence of statements that has the same functionality and can be
    7044                 :             :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7045                 :             :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7046                 :             :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7047                 :             :    statement, having first checked that the target supports the new operation
    7048                 :             :    in that type.
    7049                 :             : 
    7050                 :             :    This function also does some bookkeeping, as explained in the documentation
    7051                 :             :    for vect_recog_pattern.  */
    7052                 :             : 
    7053                 :             : static void
    7054                 :   890552645 : vect_pattern_recog_1 (vec_info *vinfo,
    7055                 :             :                       const vect_recog_func &recog_func, stmt_vec_info stmt_info)
    7056                 :             : {
    7057                 :   890552645 :   gimple *pattern_stmt;
    7058                 :   890552645 :   tree pattern_vectype;
    7059                 :             : 
    7060                 :             :   /* If this statement has already been replaced with pattern statements,
    7061                 :             :      leave the original statement alone, since the first match wins.
    7062                 :             :      Instead try to match against the definition statements that feed
    7063                 :             :      the main pattern statement.  */
    7064                 :   890552645 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7065                 :             :     {
    7066                 :     9793607 :       gimple_stmt_iterator gsi;
    7067                 :     9793607 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7068                 :    23634284 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7069                 :    13840677 :         vect_pattern_recog_1 (vinfo, recog_func,
    7070                 :             :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7071                 :             :       return;
    7072                 :             :     }
    7073                 :             : 
    7074                 :   880759038 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7075                 :   880759038 :   pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
    7076                 :   880759038 :   if (!pattern_stmt)
    7077                 :             :     {
    7078                 :             :       /* Clear any half-formed pattern definition sequence.  */
    7079                 :   880044420 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7080                 :   880044420 :       return;
    7081                 :             :     }
    7082                 :             : 
    7083                 :             :   /* Found a vectorizable pattern.  */
    7084                 :      714618 :   if (dump_enabled_p ())
    7085                 :       17659 :     dump_printf_loc (MSG_NOTE, vect_location,
    7086                 :             :                      "%s pattern recognized: %G",
    7087                 :       17659 :                      recog_func.name, pattern_stmt);
    7088                 :             : 
    7089                 :             :   /* Mark the stmts that are involved in the pattern. */
    7090                 :      714618 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7091                 :             : }
    7092                 :             : 
    7093                 :             : 
    7094                 :             : /* Function vect_pattern_recog
    7095                 :             : 
    7096                 :             :    Input:
    7097                 :             :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7098                 :             :         computation idioms.
    7099                 :             : 
    7100                 :             :    Output - for each computation idiom that is detected we create a new stmt
    7101                 :             :         that provides the same functionality and that can be vectorized.  We
    7102                 :             :         also record some information in the struct_stmt_info of the relevant
    7103                 :             :         stmts, as explained below:
    7104                 :             : 
    7105                 :             :    At the entry to this function we have the following stmts, with the
    7106                 :             :    following initial value in the STMT_VINFO fields:
    7107                 :             : 
    7108                 :             :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7109                 :             :          S1: a_i = ....                 -       -               -
    7110                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7111                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7112                 :             :          S4: a_0 = ..use(a_1)..         -       -               -
    7113                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7114                 :             : 
    7115                 :             :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7116                 :             :    represented by a single stmt.  We then:
    7117                 :             :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7118                 :             :      inserted into the code)
    7119                 :             :    - fill in the STMT_VINFO fields as follows:
    7120                 :             : 
    7121                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7122                 :             :          S1: a_i = ....                 -       -               -
    7123                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7124                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7125                 :             :          S4: a_0 = ..use(a_1)..         true    S6              -
    7126                 :             :           '---> S6: a_new = ....        -       S4              -
    7127                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7128                 :             : 
    7129                 :             :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7130                 :             :    to each other through the RELATED_STMT field).
    7131                 :             : 
    7132                 :             :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7133                 :             :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7134                 :             :    remain irrelevant unless used by stmts other than S4.
    7135                 :             : 
    7136                 :             :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7137                 :             :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7138                 :             :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7139                 :             :    S4 will be skipped, and S5 will be vectorized as usual:
    7140                 :             : 
    7141                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7142                 :             :          S1: a_i = ....                 -       -               -
    7143                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7144                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7145                 :             :        > VS6: va_new = ....             -       -               -
    7146                 :             :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7147                 :             :           '---> S6: a_new = ....        -       S4              VS6
    7148                 :             :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7149                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7150                 :             : 
    7151                 :             :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7152                 :             :    elsewhere), and we'll end up with:
    7153                 :             : 
    7154                 :             :         VS6: va_new = ....
    7155                 :             :         VS5: ... = ..vuse(va_new)..
    7156                 :             : 
    7157                 :             :    In case of more than one pattern statements, e.g., widen-mult with
    7158                 :             :    intermediate type:
    7159                 :             : 
    7160                 :             :      S1  a_t = ;
    7161                 :             :      S2  a_T = (TYPE) a_t;
    7162                 :             :            '--> S3: a_it = (interm_type) a_t;
    7163                 :             :      S4  prod_T = a_T * CONST;
    7164                 :             :            '--> S5: prod_T' = a_it w* CONST;
    7165                 :             : 
    7166                 :             :    there may be other users of a_T outside the pattern.  In that case S2 will
    7167                 :             :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7168                 :             :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7169                 :             :    be recorded in S3.  */
    7170                 :             : 
    7171                 :             : void
    7172                 :      892880 : vect_pattern_recog (vec_info *vinfo)
    7173                 :             : {
    7174                 :      892880 :   basic_block *bbs = vinfo->bbs;
    7175                 :      892880 :   unsigned int nbbs = vinfo->nbbs;
    7176                 :             : 
    7177                 :      892880 :   vect_determine_precisions (vinfo);
    7178                 :             : 
    7179                 :      892880 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7180                 :             : 
    7181                 :             :   /* Scan through the stmts in the region, applying the pattern recognition
    7182                 :             :      functions starting at each stmt visited.  */
    7183                 :    11432570 :   for (unsigned i = 0; i < nbbs; i++)
    7184                 :             :     {
    7185                 :    10539690 :       basic_block bb = bbs[i];
    7186                 :             : 
    7187                 :    98247556 :       for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7188                 :             :         {
    7189                 :    77168176 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7190                 :             : 
    7191                 :    77168176 :           if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7192                 :    49770927 :             continue;
    7193                 :             : 
    7194                 :             :           /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7195                 :   904109217 :           for (const auto &func_ptr : vect_vect_recog_func_ptrs)
    7196                 :   876711968 :             vect_pattern_recog_1 (vinfo, func_ptr,
    7197                 :             :                                   stmt_info);
    7198                 :             :         }
    7199                 :             :     }
    7200                 :             : 
    7201                 :             :   /* After this no more add_stmt calls are allowed.  */
    7202                 :      892880 :   vinfo->stmt_vec_info_ro = true;
    7203                 :      892880 : }
    7204                 :             : 
    7205                 :             : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7206                 :             :    or internal_fn contained in ch, respectively.  */
    7207                 :             : gimple *
    7208                 :      127822 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7209                 :             : {
    7210                 :      127822 :   gcc_assert (op0 != NULL_TREE);
    7211                 :      127822 :   if (ch.is_tree_code ())
    7212                 :      127822 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7213                 :             : 
    7214                 :           0 :   gcc_assert (ch.is_internal_fn ());
    7215                 :           0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7216                 :             :                                              op1 == NULL_TREE ? 1 : 2,
    7217                 :             :                                              op0, op1);
    7218                 :           0 :   gimple_call_set_lhs (stmt, lhs);
    7219                 :           0 :   return stmt;
    7220                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.