LCOV - code coverage report
Current view: top level - gcc - tree-vect-patterns.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 85.0 % 2952 2510
Test Date: 2024-03-23 14:05:01 Functions: 88.0 % 83 73
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Analysis Utilities for Loop Vectorization.
       2                 :             :    Copyright (C) 2006-2024 Free Software Foundation, Inc.
       3                 :             :    Contributed by Dorit Nuzman <dorit@il.ibm.com>
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.  */
      20                 :             : 
      21                 :             : #include "config.h"
      22                 :             : #include "system.h"
      23                 :             : #include "coretypes.h"
      24                 :             : #include "backend.h"
      25                 :             : #include "rtl.h"
      26                 :             : #include "tree.h"
      27                 :             : #include "gimple.h"
      28                 :             : #include "gimple-iterator.h"
      29                 :             : #include "gimple-fold.h"
      30                 :             : #include "ssa.h"
      31                 :             : #include "expmed.h"
      32                 :             : #include "optabs-tree.h"
      33                 :             : #include "insn-config.h"
      34                 :             : #include "recog.h"            /* FIXME: for insn_data */
      35                 :             : #include "fold-const.h"
      36                 :             : #include "stor-layout.h"
      37                 :             : #include "tree-eh.h"
      38                 :             : #include "gimplify.h"
      39                 :             : #include "gimple-iterator.h"
      40                 :             : #include "gimple-fold.h"
      41                 :             : #include "gimplify-me.h"
      42                 :             : #include "cfgloop.h"
      43                 :             : #include "tree-vectorizer.h"
      44                 :             : #include "dumpfile.h"
      45                 :             : #include "builtins.h"
      46                 :             : #include "internal-fn.h"
      47                 :             : #include "case-cfn-macros.h"
      48                 :             : #include "fold-const-call.h"
      49                 :             : #include "attribs.h"
      50                 :             : #include "cgraph.h"
      51                 :             : #include "omp-simd-clone.h"
      52                 :             : #include "predict.h"
      53                 :             : #include "tree-vector-builder.h"
      54                 :             : #include "vec-perm-indices.h"
      55                 :             : #include "gimple-range.h"
      56                 :             : 
      57                 :             : 
      58                 :             : /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
      59                 :             :    in the first operand.  Disentangling this is future work, the
      60                 :             :    IL is properly transfered to VEC_COND_EXPRs with separate compares.  */
      61                 :             : 
      62                 :             : 
      63                 :             : /* Return true if we have a useful VR_RANGE range for VAR, storing it
      64                 :             :    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
      65                 :             : 
      66                 :             : bool
      67                 :     9391273 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
      68                 :             : {
      69                 :     9391273 :   value_range vr;
      70                 :     9391273 :   tree vr_min, vr_max;
      71                 :    18782546 :   get_range_query (cfun)->range_of_expr (vr, var);
      72                 :     9391273 :   if (vr.undefined_p ())
      73                 :          33 :     vr.set_varying (TREE_TYPE (var));
      74                 :     9391273 :   value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
      75                 :     9391273 :   *min_value = wi::to_wide (vr_min);
      76                 :     9391273 :   *max_value = wi::to_wide (vr_max);
      77                 :     9391273 :   wide_int nonzero = get_nonzero_bits (var);
      78                 :     9391273 :   signop sgn = TYPE_SIGN (TREE_TYPE (var));
      79                 :     9391273 :   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
      80                 :             :                                          nonzero, sgn) == VR_RANGE)
      81                 :             :     {
      82                 :     4118932 :       if (dump_enabled_p ())
      83                 :             :         {
      84                 :       66994 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      85                 :       66994 :           dump_printf (MSG_NOTE, " has range [");
      86                 :       66994 :           dump_hex (MSG_NOTE, *min_value);
      87                 :       66994 :           dump_printf (MSG_NOTE, ", ");
      88                 :       66994 :           dump_hex (MSG_NOTE, *max_value);
      89                 :       66994 :           dump_printf (MSG_NOTE, "]\n");
      90                 :             :         }
      91                 :     4118932 :       return true;
      92                 :             :     }
      93                 :             :   else
      94                 :             :     {
      95                 :     5272341 :       if (dump_enabled_p ())
      96                 :             :         {
      97                 :       82766 :           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
      98                 :       82766 :           dump_printf (MSG_NOTE, " has no range info\n");
      99                 :             :         }
     100                 :     5272341 :       return false;
     101                 :             :     }
     102                 :     9391273 : }
     103                 :             : 
     104                 :             : /* Report that we've found an instance of pattern PATTERN in
     105                 :             :    statement STMT.  */
     106                 :             : 
     107                 :             : static void
     108                 :      804806 : vect_pattern_detected (const char *name, gimple *stmt)
     109                 :             : {
     110                 :      804806 :   if (dump_enabled_p ())
     111                 :       23884 :     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
     112                 :      804806 : }
     113                 :             : 
     114                 :             : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
     115                 :             :    return the pattern statement's stmt_vec_info.  Set its vector type to
     116                 :             :    VECTYPE if it doesn't have one already.  */
     117                 :             : 
     118                 :             : static stmt_vec_info
     119                 :     1437397 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     120                 :             :                         stmt_vec_info orig_stmt_info, tree vectype)
     121                 :             : {
     122                 :     1437397 :   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
     123                 :     1437397 :   if (pattern_stmt_info == NULL)
     124                 :      894474 :     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
     125                 :     1437397 :   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
     126                 :             : 
     127                 :     1437397 :   pattern_stmt_info->pattern_stmt_p = true;
     128                 :     1437397 :   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
     129                 :     1437397 :   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
     130                 :     1437397 :     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
     131                 :     1437397 :   STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
     132                 :     1437397 :   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
     133                 :             :     {
     134                 :     1592656 :       gcc_assert (!vectype
     135                 :             :                   || is_a <gcond *> (pattern_stmt)
     136                 :             :                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
     137                 :             :                       == vect_use_mask_type_p (orig_stmt_info)));
     138                 :      903030 :       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
     139                 :      903030 :       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
     140                 :             :     }
     141                 :     1437397 :   return pattern_stmt_info;
     142                 :             : }
     143                 :             : 
     144                 :             : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
     145                 :             :    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
     146                 :             :    have one already.  */
     147                 :             : 
     148                 :             : static void
     149                 :      637057 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
     150                 :             :                        stmt_vec_info orig_stmt_info, tree vectype)
     151                 :             : {
     152                 :      637057 :   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
     153                 :      637057 :   STMT_VINFO_RELATED_STMT (orig_stmt_info)
     154                 :           0 :     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
     155                 :      609711 : }
     156                 :             : 
     157                 :             : /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
     158                 :             :    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
     159                 :             :    be different from the vector type of the final pattern statement.
     160                 :             :    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
     161                 :             :    from which it was derived.  */
     162                 :             : 
     163                 :             : static inline void
     164                 :      764700 : append_pattern_def_seq (vec_info *vinfo,
     165                 :             :                         stmt_vec_info stmt_info, gimple *new_stmt,
     166                 :             :                         tree vectype = NULL_TREE,
     167                 :             :                         tree scalar_type_for_mask = NULL_TREE)
     168                 :             : {
     169                 :     1298962 :   gcc_assert (!scalar_type_for_mask
     170                 :             :               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
     171                 :      764700 :   if (vectype)
     172                 :             :     {
     173                 :      534367 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
     174                 :      534367 :       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
     175                 :      534367 :       if (scalar_type_for_mask)
     176                 :      230438 :         new_stmt_info->mask_precision
     177                 :      460876 :           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
     178                 :             :     }
     179                 :      764700 :   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
     180                 :             :                                       new_stmt);
     181                 :      764700 : }
     182                 :             : 
     183                 :             : /* The caller wants to perform new operations on vect_external variable
     184                 :             :    VAR, so that the result of the operations would also be vect_external.
     185                 :             :    Return the edge on which the operations can be performed, if one exists.
     186                 :             :    Return null if the operations should instead be treated as part of
     187                 :             :    the pattern that needs them.  */
     188                 :             : 
     189                 :             : static edge
     190                 :        5089 : vect_get_external_def_edge (vec_info *vinfo, tree var)
     191                 :             : {
     192                 :        5089 :   edge e = NULL;
     193                 :        5089 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     194                 :             :     {
     195                 :         494 :       e = loop_preheader_edge (loop_vinfo->loop);
     196                 :         494 :       if (!SSA_NAME_IS_DEFAULT_DEF (var))
     197                 :             :         {
     198                 :         417 :           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
     199                 :         417 :           if (bb == NULL
     200                 :         417 :               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
     201                 :             :             e = NULL;
     202                 :             :         }
     203                 :             :     }
     204                 :        5089 :   return e;
     205                 :             : }
     206                 :             : 
     207                 :             : /* Return true if the target supports a vector version of CODE,
     208                 :             :    where CODE is known to map to a direct optab with the given SUBTYPE.
     209                 :             :    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
     210                 :             :    specifies the type of the scalar result.
     211                 :             : 
     212                 :             :    If CODE allows the inputs and outputs to have different type
     213                 :             :    (such as for WIDEN_SUM_EXPR), it is the input mode rather
     214                 :             :    than the output mode that determines the appropriate target pattern.
     215                 :             :    Operand 0 of the target pattern then specifies the mode that the output
     216                 :             :    must have.
     217                 :             : 
     218                 :             :    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
     219                 :             :    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
     220                 :             :    is nonnull.  */
     221                 :             : 
     222                 :             : static bool
     223                 :        2501 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
     224                 :             :                                  tree itype, tree *vecotype_out,
     225                 :             :                                  tree *vecitype_out = NULL,
     226                 :             :                                  enum optab_subtype subtype = optab_default)
     227                 :             : {
     228                 :        2501 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
     229                 :        2501 :   if (!vecitype)
     230                 :             :     return false;
     231                 :             : 
     232                 :        2501 :   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
     233                 :        2501 :   if (!vecotype)
     234                 :             :     return false;
     235                 :             : 
     236                 :        2297 :   optab optab = optab_for_tree_code (code, vecitype, subtype);
     237                 :        2297 :   if (!optab)
     238                 :             :     return false;
     239                 :             : 
     240                 :        2297 :   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
     241                 :        2297 :   if (icode == CODE_FOR_nothing
     242                 :        2297 :       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
     243                 :        1914 :     return false;
     244                 :             : 
     245                 :         383 :   *vecotype_out = vecotype;
     246                 :         383 :   if (vecitype_out)
     247                 :         383 :     *vecitype_out = vecitype;
     248                 :             :   return true;
     249                 :             : }
     250                 :             : 
     251                 :             : /* Round bit precision PRECISION up to a full element.  */
     252                 :             : 
     253                 :             : static unsigned int
     254                 :     3071298 : vect_element_precision (unsigned int precision)
     255                 :             : {
     256                 :           0 :   precision = 1 << ceil_log2 (precision);
     257                 :     3071298 :   return MAX (precision, BITS_PER_UNIT);
     258                 :             : }
     259                 :             : 
     260                 :             : /* If OP is defined by a statement that's being considered for vectorization,
     261                 :             :    return information about that statement, otherwise return NULL.  */
     262                 :             : 
     263                 :             : static stmt_vec_info
     264                 :     1267854 : vect_get_internal_def (vec_info *vinfo, tree op)
     265                 :             : {
     266                 :     1267854 :   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
     267                 :     1267854 :   if (def_stmt_info
     268                 :     1241223 :       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
     269                 :     1232450 :     return def_stmt_info;
     270                 :             :   return NULL;
     271                 :             : }
     272                 :             : 
     273                 :             : /* Check whether NAME, an ssa-name used in STMT_VINFO,
     274                 :             :    is a result of a type promotion, such that:
     275                 :             :      DEF_STMT: NAME = NOP (name0)
     276                 :             :    If CHECK_SIGN is TRUE, check that either both types are signed or both are
     277                 :             :    unsigned.  */
     278                 :             : 
     279                 :             : static bool
     280                 :           0 : type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
     281                 :             :                    tree *orig_type, gimple **def_stmt, bool *promotion)
     282                 :             : {
     283                 :           0 :   tree type = TREE_TYPE (name);
     284                 :           0 :   tree oprnd0;
     285                 :           0 :   enum vect_def_type dt;
     286                 :             : 
     287                 :           0 :   stmt_vec_info def_stmt_info;
     288                 :           0 :   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
     289                 :             :     return false;
     290                 :             : 
     291                 :           0 :   if (dt != vect_internal_def
     292                 :           0 :       && dt != vect_external_def && dt != vect_constant_def)
     293                 :             :     return false;
     294                 :             : 
     295                 :           0 :   if (!*def_stmt)
     296                 :             :     return false;
     297                 :             : 
     298                 :           0 :   if (!is_gimple_assign (*def_stmt))
     299                 :             :     return false;
     300                 :             : 
     301                 :           0 :   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
     302                 :             :     return false;
     303                 :             : 
     304                 :           0 :   oprnd0 = gimple_assign_rhs1 (*def_stmt);
     305                 :             : 
     306                 :           0 :   *orig_type = TREE_TYPE (oprnd0);
     307                 :           0 :   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
     308                 :           0 :       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
     309                 :             :     return false;
     310                 :             : 
     311                 :           0 :   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
     312                 :           0 :     *promotion = true;
     313                 :             :   else
     314                 :           0 :     *promotion = false;
     315                 :             : 
     316                 :           0 :   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
     317                 :             :     return false;
     318                 :             : 
     319                 :             :   return true;
     320                 :             : }
     321                 :             : 
     322                 :             : /* Holds information about an input operand after some sign changes
     323                 :             :    and type promotions have been peeled away.  */
     324                 :             : class vect_unpromoted_value {
     325                 :             : public:
     326                 :             :   vect_unpromoted_value ();
     327                 :             : 
     328                 :             :   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
     329                 :             : 
     330                 :             :   /* The value obtained after peeling away zero or more casts.  */
     331                 :             :   tree op;
     332                 :             : 
     333                 :             :   /* The type of OP.  */
     334                 :             :   tree type;
     335                 :             : 
     336                 :             :   /* The definition type of OP.  */
     337                 :             :   vect_def_type dt;
     338                 :             : 
     339                 :             :   /* If OP is the result of peeling at least one cast, and if the cast
     340                 :             :      of OP itself is a vectorizable statement, CASTER identifies that
     341                 :             :      statement, otherwise it is null.  */
     342                 :             :   stmt_vec_info caster;
     343                 :             : };
     344                 :             : 
     345                 :   236957787 : inline vect_unpromoted_value::vect_unpromoted_value ()
     346                 :   236957787 :   : op (NULL_TREE),
     347                 :   236957787 :     type (NULL_TREE),
     348                 :   236957787 :     dt (vect_uninitialized_def),
     349                 :     1887073 :     caster (NULL)
     350                 :             : {
     351                 :        3075 : }
     352                 :             : 
     353                 :             : /* Set the operand to OP_IN, its definition type to DT_IN, and the
     354                 :             :    statement that casts it to CASTER_IN.  */
     355                 :             : 
     356                 :             : inline void
     357                 :     8063812 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
     358                 :             :                                stmt_vec_info caster_in)
     359                 :             : {
     360                 :     8063812 :   op = op_in;
     361                 :     8063812 :   type = TREE_TYPE (op);
     362                 :     8063812 :   dt = dt_in;
     363                 :     8063812 :   caster = caster_in;
     364                 :     8063812 : }
     365                 :             : 
     366                 :             : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
     367                 :             :    to reach some vectorizable inner operand OP', continuing as long as it
     368                 :             :    is possible to convert OP' back to OP using a possible sign change
     369                 :             :    followed by a possible promotion P.  Return this OP', or null if OP is
     370                 :             :    not a vectorizable SSA name.  If there is a promotion P, describe its
     371                 :             :    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
     372                 :             :    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
     373                 :             :    have more than one user.
     374                 :             : 
     375                 :             :    A successful return means that it is possible to go from OP' to OP
     376                 :             :    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
     377                 :             :    whereas the cast from UNPROM to OP might be a promotion, a sign
     378                 :             :    change, or a nop.
     379                 :             : 
     380                 :             :    E.g. say we have:
     381                 :             : 
     382                 :             :        signed short *ptr = ...;
     383                 :             :        signed short C = *ptr;
     384                 :             :        unsigned short B = (unsigned short) C;    // sign change
     385                 :             :        signed int A = (signed int) B;            // unsigned promotion
     386                 :             :        ...possible other uses of A...
     387                 :             :        unsigned int OP = (unsigned int) A;       // sign change
     388                 :             : 
     389                 :             :    In this case it's possible to go directly from C to OP using:
     390                 :             : 
     391                 :             :        OP = (unsigned int) (unsigned short) C;
     392                 :             :             +------------+ +--------------+
     393                 :             :                promotion      sign change
     394                 :             : 
     395                 :             :    so OP' would be C.  The input to the promotion is B, so UNPROM
     396                 :             :    would describe B.  */
     397                 :             : 
     398                 :             : static tree
     399                 :     6043509 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
     400                 :             :                                       vect_unpromoted_value *unprom,
     401                 :             :                                       bool *single_use_p = NULL)
     402                 :             : {
     403                 :     6043509 :   tree op_type = TREE_TYPE (op);
     404                 :     6043509 :   if (!INTEGRAL_TYPE_P (op_type))
     405                 :             :     return NULL_TREE;
     406                 :             : 
     407                 :     6008649 :   tree res = NULL_TREE;
     408                 :     6008649 :   unsigned int orig_precision = TYPE_PRECISION (op_type);
     409                 :     6008649 :   unsigned int min_precision = orig_precision;
     410                 :     6008649 :   stmt_vec_info caster = NULL;
     411                 :     7298567 :   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
     412                 :             :     {
     413                 :             :       /* See whether OP is simple enough to vectorize.  */
     414                 :     7135738 :       stmt_vec_info def_stmt_info;
     415                 :     7135738 :       gimple *def_stmt;
     416                 :     7135738 :       vect_def_type dt;
     417                 :     7135738 :       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
     418                 :             :         break;
     419                 :             : 
     420                 :             :       /* If OP is the input of a demotion, skip over it to see whether
     421                 :             :          OP is itself the result of a promotion.  If so, the combined
     422                 :             :          effect of the promotion and the demotion might fit the required
     423                 :             :          pattern, otherwise neither operation fits.
     424                 :             : 
     425                 :             :          This copes with cases such as the result of an arithmetic
     426                 :             :          operation being truncated before being stored, and where that
     427                 :             :          arithmetic operation has been recognized as an over-widened one.  */
     428                 :     7130865 :       if (TYPE_PRECISION (op_type) <= min_precision)
     429                 :             :         {
     430                 :             :           /* Use OP as the UNPROM described above if we haven't yet
     431                 :             :              found a promotion, or if using the new input preserves the
     432                 :             :              sign of the previous promotion.  */
     433                 :     7012063 :           if (!res
     434                 :     1092263 :               || TYPE_PRECISION (unprom->type) == orig_precision
     435                 :     7046155 :               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
     436                 :             :             {
     437                 :     6981491 :               unprom->set_op (op, dt, caster);
     438                 :     6981491 :               min_precision = TYPE_PRECISION (op_type);
     439                 :             :             }
     440                 :             :           /* Stop if we've already seen a promotion and if this
     441                 :             :              conversion does more than change the sign.  */
     442                 :       30572 :           else if (TYPE_PRECISION (op_type)
     443                 :       30572 :                    != TYPE_PRECISION (unprom->type))
     444                 :             :             break;
     445                 :             : 
     446                 :             :           /* The sequence now extends to OP.  */
     447                 :             :           res = op;
     448                 :             :         }
     449                 :             : 
     450                 :             :       /* See whether OP is defined by a cast.  Record it as CASTER if
     451                 :             :          the cast is potentially vectorizable.  */
     452                 :     7130229 :       if (!def_stmt)
     453                 :             :         break;
     454                 :     6962012 :       caster = def_stmt_info;
     455                 :             : 
     456                 :             :       /* Ignore pattern statements, since we don't link uses for them.  */
     457                 :     6962012 :       if (caster
     458                 :     6962012 :           && single_use_p
     459                 :     1201747 :           && !STMT_VINFO_RELATED_STMT (caster)
     460                 :     8043923 :           && !has_single_use (res))
     461                 :      712425 :         *single_use_p = false;
     462                 :             : 
     463                 :    12807832 :       gassign *assign = dyn_cast <gassign *> (def_stmt);
     464                 :     4550444 :       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
     465                 :             :         break;
     466                 :             : 
     467                 :             :       /* Continue with the input to the cast.  */
     468                 :     1289918 :       op = gimple_assign_rhs1 (def_stmt);
     469                 :     1289918 :       op_type = TREE_TYPE (op);
     470                 :             :     }
     471                 :             :   return res;
     472                 :             : }
     473                 :             : 
     474                 :             : /* OP is an integer operand to an operation that returns TYPE, and we
     475                 :             :    want to treat the operation as a widening one.  So far we can treat
     476                 :             :    it as widening from *COMMON_TYPE.
     477                 :             : 
     478                 :             :    Return true if OP is suitable for such a widening operation,
     479                 :             :    either widening from *COMMON_TYPE or from some supertype of it.
     480                 :             :    Update *COMMON_TYPE to the supertype in the latter case.
     481                 :             : 
     482                 :             :    SHIFT_P is true if OP is a shift amount.  */
     483                 :             : 
     484                 :             : static bool
     485                 :      237790 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
     486                 :             :                             tree *common_type)
     487                 :             : {
     488                 :             :   /* Calculate the minimum precision required by OP, without changing
     489                 :             :      the sign of either operand.  */
     490                 :      237790 :   unsigned int precision;
     491                 :      237790 :   if (shift_p)
     492                 :             :     {
     493                 :       11555 :       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
     494                 :             :         return false;
     495                 :        9260 :       precision = TREE_INT_CST_LOW (op);
     496                 :             :     }
     497                 :             :   else
     498                 :             :     {
     499                 :      226235 :       precision = wi::min_precision (wi::to_widest (op),
     500                 :      226235 :                                      TYPE_SIGN (*common_type));
     501                 :      226235 :       if (precision * 2 > TYPE_PRECISION (type))
     502                 :             :         return false;
     503                 :             :     }
     504                 :             : 
     505                 :             :   /* If OP requires a wider type, switch to that type.  The checks
     506                 :             :      above ensure that this is still narrower than the result.  */
     507                 :      226129 :   precision = vect_element_precision (precision);
     508                 :      226129 :   if (TYPE_PRECISION (*common_type) < precision)
     509                 :        5083 :     *common_type = build_nonstandard_integer_type
     510                 :        5083 :       (precision, TYPE_UNSIGNED (*common_type));
     511                 :             :   return true;
     512                 :             : }
     513                 :             : 
     514                 :             : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
     515                 :             :    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
     516                 :             : 
     517                 :             : static bool
     518                 :       35885 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
     519                 :             : {
     520                 :       35885 :   if (types_compatible_p (*common_type, new_type))
     521                 :             :     return true;
     522                 :             : 
     523                 :             :   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
     524                 :        6436 :   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
     525                 :        6436 :       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
     526                 :             :     return true;
     527                 :             : 
     528                 :             :   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
     529                 :        5791 :   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
     530                 :        5791 :       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
     531                 :             :     {
     532                 :         362 :       *common_type = new_type;
     533                 :         362 :       return true;
     534                 :             :     }
     535                 :             : 
     536                 :             :   /* We have mismatched signs, with the signed type being
     537                 :             :      no wider than the unsigned type.  In this case we need
     538                 :             :      a wider signed type.  */
     539                 :        5429 :   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
     540                 :             :                                 TYPE_PRECISION (new_type));
     541                 :        5429 :   precision *= 2;
     542                 :             : 
     543                 :        5429 :   if (precision * 2 > TYPE_PRECISION (type))
     544                 :             :     return false;
     545                 :             : 
     546                 :          17 :   *common_type = build_nonstandard_integer_type (precision, false);
     547                 :          17 :   return true;
     548                 :             : }
     549                 :             : 
     550                 :             : /* Check whether STMT_INFO can be viewed as a tree of integer operations
     551                 :             :    in which each node either performs CODE or WIDENED_CODE, and where
     552                 :             :    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
     553                 :             :    specifies the maximum number of leaf operands.  SHIFT_P says whether
     554                 :             :    CODE and WIDENED_CODE are some sort of shift.
     555                 :             : 
     556                 :             :    If STMT_INFO is such a tree, return the number of leaf operands
     557                 :             :    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
     558                 :             :    to a type that (a) is narrower than the result of STMT_INFO and
     559                 :             :    (b) can hold all leaf operand values.
     560                 :             : 
     561                 :             :    If SUBTYPE then allow that the signs of the operands
     562                 :             :    may differ in signs but not in precision.  SUBTYPE is updated to reflect
     563                 :             :    this.
     564                 :             : 
     565                 :             :    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
     566                 :             :    exists.  */
     567                 :             : 
     568                 :             : static unsigned int
     569                 :   100120848 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
     570                 :             :                       code_helper widened_code, bool shift_p,
     571                 :             :                       unsigned int max_nops,
     572                 :             :                       vect_unpromoted_value *unprom, tree *common_type,
     573                 :             :                       enum optab_subtype *subtype = NULL)
     574                 :             : {
     575                 :             :   /* Check for an integer operation with the right code.  */
     576                 :   100120848 :   gimple* stmt = stmt_info->stmt;
     577                 :   100120848 :   if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
     578                 :             :     return 0;
     579                 :             : 
     580                 :    81582515 :   code_helper rhs_code;
     581                 :    81582515 :   if (is_gimple_assign (stmt))
     582                 :    69272546 :     rhs_code = gimple_assign_rhs_code (stmt);
     583                 :    12309969 :   else if (is_gimple_call (stmt))
     584                 :    12309969 :     rhs_code = gimple_call_combined_fn (stmt);
     585                 :             :   else
     586                 :             :     return 0;
     587                 :             : 
     588                 :    81582515 :   if (rhs_code != code
     589                 :    81582515 :       && rhs_code != widened_code)
     590                 :             :     return 0;
     591                 :             : 
     592                 :     4789965 :   tree lhs = gimple_get_lhs (stmt);
     593                 :     4789965 :   tree type = TREE_TYPE (lhs);
     594                 :     4789965 :   if (!INTEGRAL_TYPE_P (type))
     595                 :             :     return 0;
     596                 :             : 
     597                 :             :   /* Assume that both operands will be leaf operands.  */
     598                 :     4308003 :   max_nops -= 2;
     599                 :             : 
     600                 :             :   /* Check the operands.  */
     601                 :     4308003 :   unsigned int next_op = 0;
     602                 :     4912287 :   for (unsigned int i = 0; i < 2; ++i)
     603                 :             :     {
     604                 :     4655361 :       vect_unpromoted_value *this_unprom = &unprom[next_op];
     605                 :     4655361 :       unsigned int nops = 1;
     606                 :     4655361 :       tree op = gimple_arg (stmt, i);
     607                 :     4655361 :       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
     608                 :             :         {
     609                 :             :           /* We already have a common type from earlier operands.
     610                 :             :              Update it to account for OP.  */
     611                 :      237790 :           this_unprom->set_op (op, vect_constant_def);
     612                 :      237790 :           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
     613                 :             :             return 0;
     614                 :             :         }
     615                 :             :       else
     616                 :             :         {
     617                 :             :           /* Only allow shifts by constants.  */
     618                 :     4417571 :           if (shift_p && i == 1)
     619                 :             :             return 0;
     620                 :             : 
     621                 :     4409226 :           if (rhs_code != code)
     622                 :             :             {
     623                 :             :               /* If rhs_code is widened_code, don't look through further
     624                 :             :                  possible promotions, there is a promotion already embedded
     625                 :             :                  in the WIDEN_*_EXPR.  */
     626                 :         130 :               if (TREE_CODE (op) != SSA_NAME
     627                 :         130 :                   || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
     628                 :           0 :                 return 0;
     629                 :             : 
     630                 :         130 :               stmt_vec_info def_stmt_info;
     631                 :         130 :               gimple *def_stmt;
     632                 :         130 :               vect_def_type dt;
     633                 :         130 :               if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
     634                 :             :                                        &def_stmt))
     635                 :             :                 return 0;
     636                 :         130 :               this_unprom->set_op (op, dt, NULL);
     637                 :             :             }
     638                 :     4409096 :           else if (!vect_look_through_possible_promotion (vinfo, op,
     639                 :             :                                                           this_unprom))
     640                 :             :             return 0;
     641                 :             : 
     642                 :     4322117 :           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
     643                 :             :             {
     644                 :             :               /* The operand isn't widened.  If STMT_INFO has the code
     645                 :             :                  for an unwidened operation, recursively check whether
     646                 :             :                  this operand is a node of the tree.  */
     647                 :     3934703 :               if (rhs_code != code
     648                 :     3934703 :                   || max_nops == 0
     649                 :     3935126 :                   || this_unprom->dt != vect_internal_def)
     650                 :             :                 return 0;
     651                 :             : 
     652                 :             :               /* Give back the leaf slot allocated above now that we're
     653                 :             :                  not treating this as a leaf operand.  */
     654                 :         423 :               max_nops += 1;
     655                 :             : 
     656                 :             :               /* Recursively process the definition of the operand.  */
     657                 :         423 :               stmt_vec_info def_stmt_info
     658                 :         423 :                 = vinfo->lookup_def (this_unprom->op);
     659                 :         423 :               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
     660                 :             :                                            widened_code, shift_p, max_nops,
     661                 :             :                                            this_unprom, common_type,
     662                 :             :                                            subtype);
     663                 :         423 :               if (nops == 0)
     664                 :             :                 return 0;
     665                 :             : 
     666                 :         287 :               max_nops -= nops;
     667                 :             :             }
     668                 :             :           else
     669                 :             :             {
     670                 :             :               /* Make sure that the operand is narrower than the result.  */
     671                 :      387414 :               if (TYPE_PRECISION (this_unprom->type) * 2
     672                 :      387414 :                   > TYPE_PRECISION (type))
     673                 :             :                 return 0;
     674                 :             : 
     675                 :             :               /* Update COMMON_TYPE for the new operand.  */
     676                 :      383112 :               if (i == 0)
     677                 :      347227 :                 *common_type = this_unprom->type;
     678                 :       35885 :               else if (!vect_joust_widened_type (type, this_unprom->type,
     679                 :             :                                                  common_type))
     680                 :             :                 {
     681                 :        5412 :                   if (subtype)
     682                 :             :                     {
     683                 :             :                       /* See if we can sign extend the smaller type.  */
     684                 :         168 :                       if (TYPE_PRECISION (this_unprom->type)
     685                 :         168 :                           > TYPE_PRECISION (*common_type))
     686                 :          36 :                         *common_type = this_unprom->type;
     687                 :         168 :                       *subtype = optab_vector_mixed_sign;
     688                 :             :                     }
     689                 :             :                   else
     690                 :             :                     return 0;
     691                 :             :                 }
     692                 :             :             }
     693                 :             :         }
     694                 :      604284 :       next_op += nops;
     695                 :             :     }
     696                 :             :   return next_op;
     697                 :             : }
     698                 :             : 
     699                 :             : /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
     700                 :             :    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
     701                 :             : 
     702                 :             : static tree
     703                 :     1235162 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
     704                 :             : {
     705                 :         148 :   return make_temp_ssa_name (type, stmt, "patt");
     706                 :             : }
     707                 :             : 
     708                 :             : /* STMT2_INFO describes a type conversion that could be split into STMT1
     709                 :             :    followed by a version of STMT2_INFO that takes NEW_RHS as its first
     710                 :             :    input.  Try to do this using pattern statements, returning true on
     711                 :             :    success.  */
     712                 :             : 
     713                 :             : static bool
     714                 :       28169 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
     715                 :             :                       gimple *stmt1, tree vectype)
     716                 :             : {
     717                 :       28169 :   if (is_pattern_stmt_p (stmt2_info))
     718                 :             :     {
     719                 :             :       /* STMT2_INFO is part of a pattern.  Get the statement to which
     720                 :             :          the pattern is attached.  */
     721                 :         823 :       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
     722                 :         823 :       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
     723                 :             : 
     724                 :         823 :       if (dump_enabled_p ())
     725                 :          24 :         dump_printf_loc (MSG_NOTE, vect_location,
     726                 :             :                          "Splitting pattern statement: %G", stmt2_info->stmt);
     727                 :             : 
     728                 :             :       /* Since STMT2_INFO is a pattern statement, we can change it
     729                 :             :          in-situ without worrying about changing the code for the
     730                 :             :          containing block.  */
     731                 :         823 :       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
     732                 :             : 
     733                 :         823 :       if (dump_enabled_p ())
     734                 :             :         {
     735                 :          24 :           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
     736                 :          24 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     737                 :             :                            stmt2_info->stmt);
     738                 :             :         }
     739                 :             : 
     740                 :         823 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
     741                 :         823 :       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
     742                 :             :         /* STMT2_INFO is the actual pattern statement.  Add STMT1
     743                 :             :            to the end of the definition sequence.  */
     744                 :         823 :         gimple_seq_add_stmt_without_update (def_seq, stmt1);
     745                 :             :       else
     746                 :             :         {
     747                 :             :           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
     748                 :             :              before it.  */
     749                 :           0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
     750                 :           0 :           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
     751                 :             :         }
     752                 :         823 :       return true;
     753                 :             :     }
     754                 :             :   else
     755                 :             :     {
     756                 :             :       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
     757                 :             :          two-statement pattern now.  */
     758                 :       27346 :       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
     759                 :       27346 :       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
     760                 :       27346 :       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
     761                 :       27346 :       if (!lhs_vectype)
     762                 :             :         return false;
     763                 :             : 
     764                 :       27346 :       if (dump_enabled_p ())
     765                 :        2142 :         dump_printf_loc (MSG_NOTE, vect_location,
     766                 :             :                          "Splitting statement: %G", stmt2_info->stmt);
     767                 :             : 
     768                 :             :       /* Add STMT1 as a singleton pattern definition sequence.  */
     769                 :       27346 :       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
     770                 :       27346 :       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
     771                 :       27346 :       gimple_seq_add_stmt_without_update (def_seq, stmt1);
     772                 :             : 
     773                 :             :       /* Build the second of the two pattern statements.  */
     774                 :       27346 :       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
     775                 :       27346 :       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
     776                 :       27346 :       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
     777                 :             : 
     778                 :       27346 :       if (dump_enabled_p ())
     779                 :             :         {
     780                 :        2142 :           dump_printf_loc (MSG_NOTE, vect_location,
     781                 :             :                            "into pattern statements: %G", stmt1);
     782                 :        2142 :           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
     783                 :             :                            (gimple *) new_stmt2);
     784                 :             :         }
     785                 :             : 
     786                 :       27346 :       return true;
     787                 :             :     }
     788                 :             : }
     789                 :             : 
     790                 :             : /* Look for the following pattern
     791                 :             :         X = x[i]
     792                 :             :         Y = y[i]
     793                 :             :         DIFF = X - Y
     794                 :             :         DAD = ABS_EXPR<DIFF>
     795                 :             : 
     796                 :             :    ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
     797                 :             :    HALF_TYPE and UNPROM will be set should the statement be found to
     798                 :             :    be a widened operation.
     799                 :             :    DIFF_STMT will be set to the MINUS_EXPR
     800                 :             :    statement that precedes the ABS_STMT unless vect_widened_op_tree
     801                 :             :    succeeds.
     802                 :             :  */
     803                 :             : static bool
     804                 :    17121825 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
     805                 :             :                                 tree *half_type,
     806                 :             :                                 vect_unpromoted_value unprom[2],
     807                 :             :                                 gassign **diff_stmt)
     808                 :             : {
     809                 :    17121825 :   if (!abs_stmt)
     810                 :             :     return false;
     811                 :             : 
     812                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     813                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     814                 :    17121825 :   enum tree_code code = gimple_assign_rhs_code (abs_stmt);
     815                 :    17121825 :   if (code != ABS_EXPR && code != ABSU_EXPR)
     816                 :             :     return false;
     817                 :             : 
     818                 :       19828 :   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
     819                 :       19828 :   tree abs_type = TREE_TYPE (abs_oprnd);
     820                 :       19828 :   if (!abs_oprnd)
     821                 :             :     return false;
     822                 :       14964 :   if (!ANY_INTEGRAL_TYPE_P (abs_type)
     823                 :        5074 :       || TYPE_OVERFLOW_WRAPS (abs_type)
     824                 :       24777 :       || TYPE_UNSIGNED (abs_type))
     825                 :             :     return false;
     826                 :             : 
     827                 :             :   /* Peel off conversions from the ABS input.  This can involve sign
     828                 :             :      changes (e.g. from an unsigned subtraction to a signed ABS input)
     829                 :             :      or signed promotion, but it can't include unsigned promotion.
     830                 :             :      (Note that ABS of an unsigned promotion should have been folded
     831                 :             :      away before now anyway.)  */
     832                 :        4949 :   vect_unpromoted_value unprom_diff;
     833                 :        4949 :   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
     834                 :             :                                                     &unprom_diff);
     835                 :        4949 :   if (!abs_oprnd)
     836                 :             :     return false;
     837                 :        4739 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
     838                 :        4739 :       && TYPE_UNSIGNED (unprom_diff.type))
     839                 :             :     return false;
     840                 :             : 
     841                 :             :   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
     842                 :        4739 :   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
     843                 :        4739 :   if (!diff_stmt_vinfo)
     844                 :             :     return false;
     845                 :             : 
     846                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
     847                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
     848                 :        4454 :   if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
     849                 :             :                             MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
     850                 :             :                             false, 2, unprom, half_type))
     851                 :             :     return true;
     852                 :             : 
     853                 :             :   /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  */
     854                 :        3249 :   gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
     855                 :        3249 :   if (diff_stmt && diff
     856                 :        2610 :       && gimple_assign_rhs_code (diff) == MINUS_EXPR
     857                 :        4114 :       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
     858                 :             :     {
     859                 :         745 :       *diff_stmt = diff;
     860                 :         745 :       *half_type = NULL_TREE;
     861                 :         745 :       return true;
     862                 :             :     }
     863                 :             : 
     864                 :             :   return false;
     865                 :             : }
     866                 :             : 
     867                 :             : /* Convert UNPROM to TYPE and return the result, adding new statements
     868                 :             :    to STMT_INFO's pattern definition statements if no better way is
     869                 :             :    available.  VECTYPE is the vector form of TYPE.
     870                 :             : 
     871                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     872                 :             : 
     873                 :             : static tree
     874                 :      374873 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     875                 :             :                     vect_unpromoted_value *unprom, tree vectype,
     876                 :             :                     enum optab_subtype subtype = optab_default)
     877                 :             : {
     878                 :             :   /* Update the type if the signs differ.  */
     879                 :      374873 :   if (subtype == optab_vector_mixed_sign)
     880                 :             :     {
     881                 :          96 :       gcc_assert (!TYPE_UNSIGNED (type));
     882                 :          96 :       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
     883                 :             :         {
     884                 :          48 :           type = unsigned_type_for (type);
     885                 :          48 :           vectype = unsigned_type_for (vectype);
     886                 :             :         }
     887                 :             :     }
     888                 :             : 
     889                 :             :   /* Check for a no-op conversion.  */
     890                 :      374873 :   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     891                 :      126732 :     return unprom->op;
     892                 :             : 
     893                 :             :   /* Allow the caller to create constant vect_unpromoted_values.  */
     894                 :      248141 :   if (TREE_CODE (unprom->op) == INTEGER_CST)
     895                 :      148325 :     return wide_int_to_tree (type, wi::to_widest (unprom->op));
     896                 :             : 
     897                 :       99816 :   tree input = unprom->op;
     898                 :       99816 :   if (unprom->caster)
     899                 :             :     {
     900                 :       48615 :       tree lhs = gimple_get_lhs (unprom->caster->stmt);
     901                 :       48615 :       tree lhs_type = TREE_TYPE (lhs);
     902                 :             : 
     903                 :             :       /* If the result of the existing cast is the right width, use it
     904                 :             :          instead of the source of the cast.  */
     905                 :       48615 :       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
     906                 :             :         input = lhs;
     907                 :             :       /* If the precision we want is between the source and result
     908                 :             :          precisions of the existing cast, try splitting the cast into
     909                 :             :          two and tapping into a mid-way point.  */
     910                 :       46924 :       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
     911                 :       46924 :                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
     912                 :             :         {
     913                 :             :           /* In order to preserve the semantics of the original cast,
     914                 :             :              give the mid-way point the same signedness as the input value.
     915                 :             : 
     916                 :             :              It would be possible to use a signed type here instead if
     917                 :             :              TYPE is signed and UNPROM->TYPE is unsigned, but that would
     918                 :             :              make the sign of the midtype sensitive to the order in
     919                 :             :              which we process the statements, since the signedness of
     920                 :             :              TYPE is the signedness required by just one of possibly
     921                 :             :              many users.  Also, unsigned promotions are usually as cheap
     922                 :             :              as or cheaper than signed ones, so it's better to keep an
     923                 :             :              unsigned promotion.  */
     924                 :       28169 :           tree midtype = build_nonstandard_integer_type
     925                 :       28169 :             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
     926                 :       28169 :           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
     927                 :       28169 :           if (vec_midtype)
     928                 :             :             {
     929                 :       28169 :               input = vect_recog_temp_ssa_var (midtype, NULL);
     930                 :       28169 :               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
     931                 :             :                                                        unprom->op);
     932                 :       28169 :               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
     933                 :             :                                          vec_midtype))
     934                 :           0 :                 append_pattern_def_seq (vinfo, stmt_info,
     935                 :             :                                         new_stmt, vec_midtype);
     936                 :             :             }
     937                 :             :         }
     938                 :             : 
     939                 :             :       /* See if we can reuse an existing result.  */
     940                 :       48615 :       if (types_compatible_p (type, TREE_TYPE (input)))
     941                 :             :         return input;
     942                 :             :     }
     943                 :             : 
     944                 :             :   /* We need a new conversion statement.  */
     945                 :       78483 :   tree new_op = vect_recog_temp_ssa_var (type, NULL);
     946                 :       78483 :   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
     947                 :             : 
     948                 :             :   /* If OP is an external value, see if we can insert the new statement
     949                 :             :      on an incoming edge.  */
     950                 :       78483 :   if (input == unprom->op && unprom->dt == vect_external_def)
     951                 :        5076 :     if (edge e = vect_get_external_def_edge (vinfo, input))
     952                 :             :       {
     953                 :         481 :         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
     954                 :         481 :         gcc_assert (!new_bb);
     955                 :             :         return new_op;
     956                 :             :       }
     957                 :             : 
     958                 :             :   /* As a (common) last resort, add the statement to the pattern itself.  */
     959                 :       78002 :   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
     960                 :       78002 :   return new_op;
     961                 :             : }
     962                 :             : 
     963                 :             : /* Invoke vect_convert_input for N elements of UNPROM and store the
     964                 :             :    result in the corresponding elements of RESULT.
     965                 :             : 
     966                 :             :    If SUBTYPE then convert the type based on the subtype.  */
     967                 :             : 
     968                 :             : static void
     969                 :      190428 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
     970                 :             :                      tree *result, tree type, vect_unpromoted_value *unprom,
     971                 :             :                      tree vectype, enum optab_subtype subtype = optab_default)
     972                 :             : {
     973                 :      564837 :   for (unsigned int i = 0; i < n; ++i)
     974                 :             :     {
     975                 :             :       unsigned int j;
     976                 :      558204 :       for (j = 0; j < i; ++j)
     977                 :      183981 :         if (unprom[j].op == unprom[i].op)
     978                 :             :           break;
     979                 :             : 
     980                 :      374409 :       if (j < i)
     981                 :         186 :         result[i] = result[j];
     982                 :             :       else
     983                 :      374223 :         result[i] = vect_convert_input (vinfo, stmt_info,
     984                 :      374223 :                                         type, &unprom[i], vectype, subtype);
     985                 :             :     }
     986                 :      190428 : }
     987                 :             : 
     988                 :             : /* The caller has created a (possibly empty) sequence of pattern definition
     989                 :             :    statements followed by a single statement PATTERN_STMT.  Cast the result
     990                 :             :    of this final statement to TYPE.  If a new statement is needed, add
     991                 :             :    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
     992                 :             :    and return the new statement, otherwise return PATTERN_STMT as-is.
     993                 :             :    VECITYPE is the vector form of PATTERN_STMT's result type.  */
     994                 :             : 
     995                 :             : static gimple *
     996                 :      214526 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
     997                 :             :                      gimple *pattern_stmt, tree vecitype)
     998                 :             : {
     999                 :      214526 :   tree lhs = gimple_get_lhs (pattern_stmt);
    1000                 :      214526 :   if (!types_compatible_p (type, TREE_TYPE (lhs)))
    1001                 :             :     {
    1002                 :      195139 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
    1003                 :      195139 :       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
    1004                 :      195139 :       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
    1005                 :             :     }
    1006                 :      214526 :   return pattern_stmt;
    1007                 :             : }
    1008                 :             : 
    1009                 :             : /* Return true if STMT_VINFO describes a reduction for which reassociation
    1010                 :             :    is allowed.  If STMT_INFO is part of a group, assume that it's part of
    1011                 :             :    a reduction chain and optimistically assume that all statements
    1012                 :             :    except the last allow reassociation.
    1013                 :             :    Also require it to have code CODE and to be a reduction
    1014                 :             :    in the outermost loop.  When returning true, store the operands in
    1015                 :             :    *OP0_OUT and *OP1_OUT.  */
    1016                 :             : 
    1017                 :             : static bool
    1018                 :    74886061 : vect_reassociating_reduction_p (vec_info *vinfo,
    1019                 :             :                                 stmt_vec_info stmt_info, tree_code code,
    1020                 :             :                                 tree *op0_out, tree *op1_out)
    1021                 :             : {
    1022                 :    74886061 :   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
    1023                 :     8658256 :   if (!loop_info)
    1024                 :             :     return false;
    1025                 :             : 
    1026                 :     8658256 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    1027                 :     9419749 :   if (!assign || gimple_assign_rhs_code (assign) != code)
    1028                 :             :     return false;
    1029                 :             : 
    1030                 :             :   /* We don't allow changing the order of the computation in the inner-loop
    1031                 :             :      when doing outer-loop vectorization.  */
    1032                 :     1864365 :   class loop *loop = LOOP_VINFO_LOOP (loop_info);
    1033                 :    76649518 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1034                 :             :     return false;
    1035                 :             : 
    1036                 :     1821081 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
    1037                 :             :     {
    1038                 :      102853 :       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
    1039                 :             :                                        code))
    1040                 :             :         return false;
    1041                 :             :     }
    1042                 :     1718228 :   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
    1043                 :             :     return false;
    1044                 :             : 
    1045                 :      100908 :   *op0_out = gimple_assign_rhs1 (assign);
    1046                 :      100908 :   *op1_out = gimple_assign_rhs2 (assign);
    1047                 :      100908 :   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
    1048                 :       38417 :     std::swap (*op0_out, *op1_out);
    1049                 :             :   return true;
    1050                 :             : }
    1051                 :             : 
    1052                 :             : /* match.pd function to match
    1053                 :             :    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
    1054                 :             :    with conditions:
    1055                 :             :    1) @1, @2, c, d, a, b are all integral type.
    1056                 :             :    2) There's single_use for both @1 and @2.
    1057                 :             :    3) a, c have same precision.
    1058                 :             :    4) c and @1 have different precision.
    1059                 :             :    5) c, d are the same type or they can differ in sign when convert is
    1060                 :             :    truncation.
    1061                 :             : 
    1062                 :             :    record a and c and d and @3.  */
    1063                 :             : 
    1064                 :             : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
    1065                 :             : 
    1066                 :             : /* Function vect_recog_cond_expr_convert
    1067                 :             : 
    1068                 :             :    Try to find the following pattern:
    1069                 :             : 
    1070                 :             :    TYPE_AB A,B;
    1071                 :             :    TYPE_CD C,D;
    1072                 :             :    TYPE_E E;
    1073                 :             :    TYPE_E op_true = (TYPE_E) A;
    1074                 :             :    TYPE_E op_false = (TYPE_E) B;
    1075                 :             : 
    1076                 :             :    E = C cmp D ? op_true : op_false;
    1077                 :             : 
    1078                 :             :    where
    1079                 :             :    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
    1080                 :             :    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
    1081                 :             :    single_use of op_true and op_false.
    1082                 :             :    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
    1083                 :             : 
    1084                 :             :    Input:
    1085                 :             : 
    1086                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1087                 :             :    here it starts with E = c cmp D ? op_true : op_false;
    1088                 :             : 
    1089                 :             :    Output:
    1090                 :             : 
    1091                 :             :    TYPE1 E' = C cmp D ? A : B;
    1092                 :             :    TYPE3 E = (TYPE3) E';
    1093                 :             : 
    1094                 :             :    There may extra nop_convert for A or B to handle different signness.
    1095                 :             : 
    1096                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1097                 :             : 
    1098                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1099                 :             :    stmts that constitute the pattern. In this case it will be:
    1100                 :             :    E = (TYPE3)E';
    1101                 :             :    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
    1102                 :             : 
    1103                 :             : static gimple *
    1104                 :    25017179 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
    1105                 :             :                                       stmt_vec_info stmt_vinfo, tree *type_out)
    1106                 :             : {
    1107                 :    42221761 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    1108                 :    17204625 :   tree lhs, match[4], temp, type, new_lhs, op2;
    1109                 :    17204625 :   gimple *cond_stmt;
    1110                 :    17204625 :   gimple *pattern_stmt;
    1111                 :             : 
    1112                 :    17204625 :   if (!last_stmt)
    1113                 :             :     return NULL;
    1114                 :             : 
    1115                 :    17204625 :   lhs = gimple_assign_lhs (last_stmt);
    1116                 :             : 
    1117                 :             :   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
    1118                 :             :      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
    1119                 :    17204625 :   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
    1120                 :             :     return NULL;
    1121                 :             : 
    1122                 :          43 :   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
    1123                 :             : 
    1124                 :          43 :   op2 = match[2];
    1125                 :          43 :   type = TREE_TYPE (match[1]);
    1126                 :          43 :   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
    1127                 :             :     {
    1128                 :           3 :       op2 = vect_recog_temp_ssa_var (type, NULL);
    1129                 :           3 :       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
    1130                 :           3 :       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
    1131                 :             :                               get_vectype_for_scalar_type (vinfo, type));
    1132                 :             :     }
    1133                 :             : 
    1134                 :          43 :   temp = vect_recog_temp_ssa_var (type, NULL);
    1135                 :          43 :   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
    1136                 :             :                                                  match[1], op2));
    1137                 :          43 :   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
    1138                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    1139                 :          43 :   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    1140                 :          43 :   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
    1141                 :          43 :   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
    1142                 :             : 
    1143                 :          43 :   if (dump_enabled_p ())
    1144                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1145                 :             :                      "created pattern stmt: %G", pattern_stmt);
    1146                 :             :   return pattern_stmt;
    1147                 :             : }
    1148                 :             : 
    1149                 :             : /* Function vect_recog_dot_prod_pattern
    1150                 :             : 
    1151                 :             :    Try to find the following pattern:
    1152                 :             : 
    1153                 :             :      type1a x_t
    1154                 :             :      type1b y_t;
    1155                 :             :      TYPE1 prod;
    1156                 :             :      TYPE2 sum = init;
    1157                 :             :    loop:
    1158                 :             :      sum_0 = phi <init, sum_1>
    1159                 :             :      S1  x_t = ...
    1160                 :             :      S2  y_t = ...
    1161                 :             :      S3  x_T = (TYPE1) x_t;
    1162                 :             :      S4  y_T = (TYPE1) y_t;
    1163                 :             :      S5  prod = x_T * y_T;
    1164                 :             :      [S6  prod = (TYPE2) prod;  #optional]
    1165                 :             :      S7  sum_1 = prod + sum_0;
    1166                 :             : 
    1167                 :             :    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
    1168                 :             :    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
    1169                 :             :    'type1a' and 'type1b' can differ.
    1170                 :             : 
    1171                 :             :    Input:
    1172                 :             : 
    1173                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1174                 :             :    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
    1175                 :             :    will be detected.
    1176                 :             : 
    1177                 :             :    Output:
    1178                 :             : 
    1179                 :             :    * TYPE_OUT: The type of the output  of this pattern.
    1180                 :             : 
    1181                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1182                 :             :    stmts that constitute the pattern. In this case it will be:
    1183                 :             :         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
    1184                 :             : 
    1185                 :             :    Note: The dot-prod idiom is a widening reduction pattern that is
    1186                 :             :          vectorized without preserving all the intermediate results. It
    1187                 :             :          produces only N/2 (widened) results (by summing up pairs of
    1188                 :             :          intermediate results) rather than all N results.  Therefore, we
    1189                 :             :          cannot allow this pattern when we want to get all the results and in
    1190                 :             :          the correct order (as is the case when this computation is in an
    1191                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    1192                 :             : 
    1193                 :             : static gimple *
    1194                 :    24962196 : vect_recog_dot_prod_pattern (vec_info *vinfo,
    1195                 :             :                              stmt_vec_info stmt_vinfo, tree *type_out)
    1196                 :             : {
    1197                 :    24962196 :   tree oprnd0, oprnd1;
    1198                 :    24962196 :   gimple *last_stmt = stmt_vinfo->stmt;
    1199                 :    24962196 :   tree type, half_type;
    1200                 :    24962196 :   gimple *pattern_stmt;
    1201                 :    24962196 :   tree var;
    1202                 :             : 
    1203                 :             :   /* Look for the following pattern
    1204                 :             :           DX = (TYPE1) X;
    1205                 :             :           DY = (TYPE1) Y;
    1206                 :             :           DPROD = DX * DY;
    1207                 :             :           DDPROD = (TYPE2) DPROD;
    1208                 :             :           sum_1 = DDPROD + sum_0;
    1209                 :             :      In which
    1210                 :             :      - DX is double the size of X
    1211                 :             :      - DY is double the size of Y
    1212                 :             :      - DX, DY, DPROD all have the same type but the sign
    1213                 :             :        between X, Y and DPROD can differ.
    1214                 :             :      - sum is the same size of DPROD or bigger
    1215                 :             :      - sum has been recognized as a reduction variable.
    1216                 :             : 
    1217                 :             :      This is equivalent to:
    1218                 :             :        DPROD = X w* Y;          #widen mult
    1219                 :             :        sum_1 = DPROD w+ sum_0;  #widen summation
    1220                 :             :      or
    1221                 :             :        DPROD = X w* Y;          #widen mult
    1222                 :             :        sum_1 = DPROD + sum_0;   #summation
    1223                 :             :    */
    1224                 :             : 
    1225                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1226                 :             :      of the above pattern.  */
    1227                 :             : 
    1228                 :    24962196 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1229                 :             :                                        &oprnd0, &oprnd1))
    1230                 :             :     return NULL;
    1231                 :             : 
    1232                 :       33819 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1233                 :             : 
    1234                 :       33819 :   vect_unpromoted_value unprom_mult;
    1235                 :       33819 :   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
    1236                 :             : 
    1237                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1238                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    1239                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    1240                 :             :      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
    1241                 :       33819 :   if (!oprnd0)
    1242                 :             :     return NULL;
    1243                 :             : 
    1244                 :       22089 :   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
    1245                 :       22089 :   if (!mult_vinfo)
    1246                 :             :     return NULL;
    1247                 :             : 
    1248                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1249                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1250                 :       64188 :   vect_unpromoted_value unprom0[2];
    1251                 :       21396 :   enum optab_subtype subtype = optab_vector;
    1252                 :       21396 :   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
    1253                 :             :                              false, 2, unprom0, &half_type, &subtype))
    1254                 :             :     return NULL;
    1255                 :             : 
    1256                 :             :   /* If there are two widening operations, make sure they agree on the sign
    1257                 :             :      of the extension.  The result of an optab_vector_mixed_sign operation
    1258                 :             :      is signed; otherwise, the result has the same sign as the operands.  */
    1259                 :         600 :   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
    1260                 :         977 :       && (subtype == optab_vector_mixed_sign
    1261                 :         377 :           ? TYPE_UNSIGNED (unprom_mult.type)
    1262                 :         215 :           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
    1263                 :             :     return NULL;
    1264                 :             : 
    1265                 :         492 :   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
    1266                 :             : 
    1267                 :             :   /* If the inputs have mixed signs, canonicalize on using the signed
    1268                 :             :      input type for analysis.  This also helps when emulating mixed-sign
    1269                 :             :      operations using signed operations.  */
    1270                 :         492 :   if (subtype == optab_vector_mixed_sign)
    1271                 :         108 :     half_type = signed_type_for (half_type);
    1272                 :             : 
    1273                 :         492 :   tree half_vectype;
    1274                 :         492 :   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
    1275                 :             :                                         type_out, &half_vectype, subtype))
    1276                 :             :     {
    1277                 :             :       /* We can emulate a mixed-sign dot-product using a sequence of
    1278                 :             :          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
    1279                 :         362 :       if (subtype != optab_vector_mixed_sign
    1280                 :         362 :           || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
    1281                 :             :                                                DOT_PROD_EXPR, half_type,
    1282                 :             :                                                type_out, &half_vectype,
    1283                 :             :                                                optab_vector))
    1284                 :         326 :         return NULL;
    1285                 :             : 
    1286                 :          36 :       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
    1287                 :             :                                                *type_out);
    1288                 :             :     }
    1289                 :             : 
    1290                 :             :   /* Get the inputs in the appropriate types.  */
    1291                 :         166 :   tree mult_oprnd[2];
    1292                 :         166 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
    1293                 :             :                        unprom0, half_vectype, subtype);
    1294                 :             : 
    1295                 :         166 :   var = vect_recog_temp_ssa_var (type, NULL);
    1296                 :         166 :   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
    1297                 :             :                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
    1298                 :             : 
    1299                 :         166 :   return pattern_stmt;
    1300                 :             : }
    1301                 :             : 
    1302                 :             : 
    1303                 :             : /* Function vect_recog_sad_pattern
    1304                 :             : 
    1305                 :             :    Try to find the following Sum of Absolute Difference (SAD) pattern:
    1306                 :             : 
    1307                 :             :      type x_t, y_t;
    1308                 :             :      signed TYPE1 diff, abs_diff;
    1309                 :             :      TYPE2 sum = init;
    1310                 :             :    loop:
    1311                 :             :      sum_0 = phi <init, sum_1>
    1312                 :             :      S1  x_t = ...
    1313                 :             :      S2  y_t = ...
    1314                 :             :      S3  x_T = (TYPE1) x_t;
    1315                 :             :      S4  y_T = (TYPE1) y_t;
    1316                 :             :      S5  diff = x_T - y_T;
    1317                 :             :      S6  abs_diff = ABS_EXPR <diff>;
    1318                 :             :      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
    1319                 :             :      S8  sum_1 = abs_diff + sum_0;
    1320                 :             : 
    1321                 :             :    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
    1322                 :             :    same size of 'TYPE1' or bigger. This is a special case of a reduction
    1323                 :             :    computation.
    1324                 :             : 
    1325                 :             :    Input:
    1326                 :             : 
    1327                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.  In the
    1328                 :             :    example, when this function is called with S8, the pattern
    1329                 :             :    {S3,S4,S5,S6,S7,S8} will be detected.
    1330                 :             : 
    1331                 :             :    Output:
    1332                 :             : 
    1333                 :             :    * TYPE_OUT: The type of the output of this pattern.
    1334                 :             : 
    1335                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1336                 :             :    stmts that constitute the pattern. In this case it will be:
    1337                 :             :         SAD_EXPR <x_t, y_t, sum_0>
    1338                 :             :   */
    1339                 :             : 
    1340                 :             : static gimple *
    1341                 :    24962041 : vect_recog_sad_pattern (vec_info *vinfo,
    1342                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1343                 :             : {
    1344                 :    24962041 :   gimple *last_stmt = stmt_vinfo->stmt;
    1345                 :    24962041 :   tree half_type;
    1346                 :             : 
    1347                 :             :   /* Look for the following pattern
    1348                 :             :           DX = (TYPE1) X;
    1349                 :             :           DY = (TYPE1) Y;
    1350                 :             :           DDIFF = DX - DY;
    1351                 :             :           DAD = ABS_EXPR <DDIFF>;
    1352                 :             :           DDPROD = (TYPE2) DPROD;
    1353                 :             :           sum_1 = DAD + sum_0;
    1354                 :             :      In which
    1355                 :             :      - DX is at least double the size of X
    1356                 :             :      - DY is at least double the size of Y
    1357                 :             :      - DX, DY, DDIFF, DAD all have the same type
    1358                 :             :      - sum is the same size of DAD or bigger
    1359                 :             :      - sum has been recognized as a reduction variable.
    1360                 :             : 
    1361                 :             :      This is equivalent to:
    1362                 :             :        DDIFF = X w- Y;          #widen sub
    1363                 :             :        DAD = ABS_EXPR <DDIFF>;
    1364                 :             :        sum_1 = DAD w+ sum_0;    #widen summation
    1365                 :             :      or
    1366                 :             :        DDIFF = X w- Y;          #widen sub
    1367                 :             :        DAD = ABS_EXPR <DDIFF>;
    1368                 :             :        sum_1 = DAD + sum_0;     #summation
    1369                 :             :    */
    1370                 :             : 
    1371                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    1372                 :             :      of the above pattern.  */
    1373                 :             : 
    1374                 :    24962041 :   tree plus_oprnd0, plus_oprnd1;
    1375                 :    24962041 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    1376                 :             :                                        &plus_oprnd0, &plus_oprnd1))
    1377                 :             :     return NULL;
    1378                 :             : 
    1379                 :       33653 :   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1380                 :             : 
    1381                 :             :   /* Any non-truncating sequence of conversions is OK here, since
    1382                 :             :      with a successful match, the result of the ABS(U) is known to fit
    1383                 :             :      within the nonnegative range of the result type.  (It cannot be the
    1384                 :             :      negative of the minimum signed value due to the range of the widening
    1385                 :             :      MINUS_EXPR.)  */
    1386                 :       33653 :   vect_unpromoted_value unprom_abs;
    1387                 :       33653 :   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
    1388                 :             :                                                       &unprom_abs);
    1389                 :             : 
    1390                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    1391                 :             :      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
    1392                 :             :      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
    1393                 :             :      Then check that plus_oprnd0 is defined by an abs_expr.  */
    1394                 :             : 
    1395                 :       33653 :   if (!plus_oprnd0)
    1396                 :             :     return NULL;
    1397                 :             : 
    1398                 :       21923 :   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
    1399                 :       21923 :   if (!abs_stmt_vinfo)
    1400                 :             :     return NULL;
    1401                 :             : 
    1402                 :             :   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
    1403                 :             :      inside the loop (in case we are analyzing an outer-loop).  */
    1404                 :       21230 :   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
    1405                 :       63690 :   vect_unpromoted_value unprom[2];
    1406                 :             : 
    1407                 :       21230 :   if (!abs_stmt)
    1408                 :             :     {
    1409                 :    24962107 :       gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
    1410                 :         283 :       if (!abd_stmt
    1411                 :         283 :           || !gimple_call_internal_p (abd_stmt)
    1412                 :           0 :           || gimple_call_num_args (abd_stmt) != 2)
    1413                 :             :         return NULL;
    1414                 :             : 
    1415                 :           0 :       tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1416                 :           0 :       tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1417                 :             : 
    1418                 :           0 :       if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
    1419                 :             :         {
    1420                 :           0 :           if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
    1421                 :             :                                                      &unprom[0])
    1422                 :           0 :               || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
    1423                 :             :                                                         &unprom[1]))
    1424                 :           0 :             return NULL;
    1425                 :             :         }
    1426                 :           0 :       else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
    1427                 :             :         {
    1428                 :           0 :           unprom[0].op = abd_oprnd0;
    1429                 :           0 :           unprom[0].type = TREE_TYPE (abd_oprnd0);
    1430                 :           0 :           unprom[1].op = abd_oprnd1;
    1431                 :           0 :           unprom[1].type = TREE_TYPE (abd_oprnd1);
    1432                 :             :         }
    1433                 :             :       else
    1434                 :             :         return NULL;
    1435                 :             : 
    1436                 :           0 :       half_type = unprom[0].type;
    1437                 :             :     }
    1438                 :       20892 :   else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
    1439                 :             :                                             unprom, NULL))
    1440                 :             :     return NULL;
    1441                 :             : 
    1442                 :         358 :   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
    1443                 :             : 
    1444                 :         358 :   tree half_vectype;
    1445                 :         358 :   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
    1446                 :             :                                         type_out, &half_vectype))
    1447                 :             :     return NULL;
    1448                 :             : 
    1449                 :             :   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
    1450                 :         217 :   tree sad_oprnd[2];
    1451                 :         217 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
    1452                 :             :                        unprom, half_vectype);
    1453                 :             : 
    1454                 :         217 :   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
    1455                 :         217 :   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
    1456                 :             :                                               sad_oprnd[1], plus_oprnd1);
    1457                 :             : 
    1458                 :         217 :   return pattern_stmt;
    1459                 :             : }
    1460                 :             : 
    1461                 :             : /* Function vect_recog_abd_pattern
    1462                 :             : 
    1463                 :             :    Try to find the following ABsolute Difference (ABD) or
    1464                 :             :    widening ABD (WIDEN_ABD) pattern:
    1465                 :             : 
    1466                 :             :    TYPE1 x;
    1467                 :             :    TYPE2 y;
    1468                 :             :    TYPE3 x_cast = (TYPE3) x;              // widening or no-op
    1469                 :             :    TYPE3 y_cast = (TYPE3) y;              // widening or no-op
    1470                 :             :    TYPE3 diff = x_cast - y_cast;
    1471                 :             :    TYPE4 diff_cast = (TYPE4) diff;        // widening or no-op
    1472                 :             :    TYPE5 abs = ABS(U)_EXPR <diff_cast>;
    1473                 :             : 
    1474                 :             :    WIDEN_ABD exists to optimize the case where TYPE4 is at least
    1475                 :             :    twice as wide as TYPE3.
    1476                 :             : 
    1477                 :             :    Input:
    1478                 :             : 
    1479                 :             :    * STMT_VINFO: The stmt from which the pattern search begins
    1480                 :             : 
    1481                 :             :    Output:
    1482                 :             : 
    1483                 :             :    * TYPE_OUT: The type of the output of this pattern
    1484                 :             : 
    1485                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1486                 :             :      stmts that constitute the pattern, principally:
    1487                 :             :         out = IFN_ABD (x, y)
    1488                 :             :         out = IFN_WIDEN_ABD (x, y)
    1489                 :             :  */
    1490                 :             : 
    1491                 :             : static gimple *
    1492                 :    24913365 : vect_recog_abd_pattern (vec_info *vinfo,
    1493                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    1494                 :             : {
    1495                 :    42014298 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1496                 :    17100933 :   if (!last_stmt)
    1497                 :             :     return NULL;
    1498                 :             : 
    1499                 :    17100933 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1500                 :             : 
    1501                 :    51302799 :   vect_unpromoted_value unprom[2];
    1502                 :    17100933 :   gassign *diff_stmt;
    1503                 :    17100933 :   tree half_type;
    1504                 :    17100933 :   if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
    1505                 :             :                                        unprom, &diff_stmt))
    1506                 :             :     return NULL;
    1507                 :             : 
    1508                 :        1592 :   tree abd_in_type, abd_out_type;
    1509                 :             : 
    1510                 :        1592 :   if (half_type)
    1511                 :             :     {
    1512                 :             :       abd_in_type = half_type;
    1513                 :             :       abd_out_type = abd_in_type;
    1514                 :             :     }
    1515                 :             :   else
    1516                 :             :     {
    1517                 :         745 :       unprom[0].op = gimple_assign_rhs1 (diff_stmt);
    1518                 :         745 :       unprom[1].op = gimple_assign_rhs2 (diff_stmt);
    1519                 :         745 :       abd_in_type = signed_type_for (out_type);
    1520                 :         745 :       abd_out_type = abd_in_type;
    1521                 :             :     }
    1522                 :             : 
    1523                 :        1592 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
    1524                 :        1592 :   if (!vectype_in)
    1525                 :             :     return NULL;
    1526                 :             : 
    1527                 :        1384 :   internal_fn ifn = IFN_ABD;
    1528                 :        1384 :   tree vectype_out = vectype_in;
    1529                 :             : 
    1530                 :        1384 :   if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
    1531                 :        1384 :       && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
    1532                 :             :     {
    1533                 :         740 :       tree mid_type
    1534                 :         740 :         = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
    1535                 :         740 :                                           TYPE_UNSIGNED (abd_in_type));
    1536                 :         740 :       tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
    1537                 :             : 
    1538                 :         740 :       code_helper dummy_code;
    1539                 :         740 :       int dummy_int;
    1540                 :         740 :       auto_vec<tree> dummy_vec;
    1541                 :         740 :       if (mid_vectype
    1542                 :         740 :           && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
    1543                 :             :                                              stmt_vinfo, mid_vectype,
    1544                 :             :                                              vectype_in,
    1545                 :             :                                              &dummy_code, &dummy_code,
    1546                 :             :                                              &dummy_int, &dummy_vec))
    1547                 :             :         {
    1548                 :           0 :           ifn = IFN_VEC_WIDEN_ABD;
    1549                 :           0 :           abd_out_type = mid_type;
    1550                 :           0 :           vectype_out = mid_vectype;
    1551                 :             :         }
    1552                 :         740 :     }
    1553                 :             : 
    1554                 :        1384 :   if (ifn == IFN_ABD
    1555                 :        1384 :       && !direct_internal_fn_supported_p (ifn, vectype_in,
    1556                 :             :                                           OPTIMIZE_FOR_SPEED))
    1557                 :             :     return NULL;
    1558                 :             : 
    1559                 :           0 :   vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
    1560                 :             : 
    1561                 :           0 :   tree abd_oprnds[2];
    1562                 :           0 :   vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
    1563                 :             :                        abd_in_type, unprom, vectype_in);
    1564                 :             : 
    1565                 :           0 :   *type_out = get_vectype_for_scalar_type (vinfo, out_type);
    1566                 :             : 
    1567                 :           0 :   tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
    1568                 :           0 :   gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
    1569                 :             :                                                 abd_oprnds[0], abd_oprnds[1]);
    1570                 :           0 :   gimple_call_set_lhs (abd_stmt, abd_result);
    1571                 :           0 :   gimple_set_location (abd_stmt, gimple_location (last_stmt));
    1572                 :             : 
    1573                 :           0 :   gimple *stmt = abd_stmt;
    1574                 :           0 :   if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
    1575                 :           0 :       && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
    1576                 :           0 :       && !TYPE_UNSIGNED (abd_out_type))
    1577                 :             :     {
    1578                 :           0 :       tree unsign = unsigned_type_for (abd_out_type);
    1579                 :           0 :       stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
    1580                 :           0 :       vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
    1581                 :             :     }
    1582                 :             : 
    1583                 :           0 :   return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
    1584                 :             : }
    1585                 :             : 
    1586                 :             : /* Recognize an operation that performs ORIG_CODE on widened inputs,
    1587                 :             :    so that it can be treated as though it had the form:
    1588                 :             : 
    1589                 :             :       A_TYPE a;
    1590                 :             :       B_TYPE b;
    1591                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1592                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1593                 :             :     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
    1594                 :             :     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
    1595                 :             :     | RES_TYPE res = a_extend ORIG_CODE b_extend;
    1596                 :             : 
    1597                 :             :    Try to replace the pattern with:
    1598                 :             : 
    1599                 :             :       A_TYPE a;
    1600                 :             :       B_TYPE b;
    1601                 :             :       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
    1602                 :             :       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
    1603                 :             :     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
    1604                 :             :     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
    1605                 :             : 
    1606                 :             :    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
    1607                 :             : 
    1608                 :             :    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
    1609                 :             :    name of the pattern being matched, for dump purposes.  */
    1610                 :             : 
    1611                 :             : static gimple *
    1612                 :   100072914 : vect_recog_widen_op_pattern (vec_info *vinfo,
    1613                 :             :                              stmt_vec_info last_stmt_info, tree *type_out,
    1614                 :             :                              tree_code orig_code, code_helper wide_code,
    1615                 :             :                              bool shift_p, const char *name)
    1616                 :             : {
    1617                 :   100072914 :   gimple *last_stmt = last_stmt_info->stmt;
    1618                 :             : 
    1619                 :   300218742 :   vect_unpromoted_value unprom[2];
    1620                 :   100072914 :   tree half_type;
    1621                 :   100072914 :   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
    1622                 :             :                              shift_p, 2, unprom, &half_type))
    1623                 :             : 
    1624                 :             :     return NULL;
    1625                 :             : 
    1626                 :             :   /* Pattern detected.  */
    1627                 :      251482 :   vect_pattern_detected (name, last_stmt);
    1628                 :             : 
    1629                 :      251482 :   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
    1630                 :      251482 :   tree itype = type;
    1631                 :      251482 :   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
    1632                 :      251482 :       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
    1633                 :      180956 :     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
    1634                 :      180956 :                                             TYPE_UNSIGNED (half_type));
    1635                 :             : 
    1636                 :             :   /* Check target support  */
    1637                 :      251482 :   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
    1638                 :      251482 :   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
    1639                 :      251482 :   tree ctype = itype;
    1640                 :      251482 :   tree vecctype = vecitype;
    1641                 :      251482 :   if (orig_code == MINUS_EXPR
    1642                 :        7606 :       && TYPE_UNSIGNED (itype)
    1643                 :      255060 :       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
    1644                 :             :     {
    1645                 :             :       /* Subtraction is special, even if half_type is unsigned and no matter
    1646                 :             :          whether type is signed or unsigned, if type is wider than itype,
    1647                 :             :          we need to sign-extend from the widening operation result to the
    1648                 :             :          result type.
    1649                 :             :          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
    1650                 :             :          itype unsigned short and type either int or unsigned int.
    1651                 :             :          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
    1652                 :             :          (unsigned short) 0xffff, but for type int we want the result -1
    1653                 :             :          and for type unsigned int 0xffffffff rather than 0xffff.  */
    1654                 :         611 :       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
    1655                 :         611 :       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
    1656                 :             :     }
    1657                 :             : 
    1658                 :      251482 :   code_helper dummy_code;
    1659                 :      251482 :   int dummy_int;
    1660                 :      251482 :   auto_vec<tree> dummy_vec;
    1661                 :      251482 :   if (!vectype
    1662                 :      251482 :       || !vecitype
    1663                 :      199710 :       || !vecctype
    1664                 :      451192 :       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
    1665                 :             :                                           vecitype, vectype,
    1666                 :             :                                           &dummy_code, &dummy_code,
    1667                 :             :                                           &dummy_int, &dummy_vec))
    1668                 :      171115 :     return NULL;
    1669                 :             : 
    1670                 :       80367 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    1671                 :       80367 :   if (!*type_out)
    1672                 :             :     return NULL;
    1673                 :             : 
    1674                 :       80367 :   tree oprnd[2];
    1675                 :       80367 :   vect_convert_inputs (vinfo, last_stmt_info,
    1676                 :             :                        2, oprnd, half_type, unprom, vectype);
    1677                 :             : 
    1678                 :       80367 :   tree var = vect_recog_temp_ssa_var (itype, NULL);
    1679                 :       80367 :   gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
    1680                 :             : 
    1681                 :       80367 :   if (vecctype != vecitype)
    1682                 :           0 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
    1683                 :             :                                         pattern_stmt, vecitype);
    1684                 :             : 
    1685                 :       80367 :   return vect_convert_output (vinfo, last_stmt_info,
    1686                 :       80367 :                               type, pattern_stmt, vecctype);
    1687                 :      251482 : }
    1688                 :             : 
    1689                 :             : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
    1690                 :             :    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    1691                 :             : 
    1692                 :             : static gimple *
    1693                 :    24979354 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1694                 :             :                                tree *type_out)
    1695                 :             : {
    1696                 :    24979354 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1697                 :             :                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
    1698                 :    24979354 :                                       "vect_recog_widen_mult_pattern");
    1699                 :             : }
    1700                 :             : 
    1701                 :             : /* Try to detect addition on widened inputs, converting PLUS_EXPR
    1702                 :             :    to IFN_VEC_WIDEN_PLUS.  See vect_recog_widen_op_pattern for details.  */
    1703                 :             : 
    1704                 :             : static gimple *
    1705                 :    25065760 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1706                 :             :                                tree *type_out)
    1707                 :             : {
    1708                 :    25065760 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1709                 :             :                                       PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
    1710                 :    25065760 :                                       false, "vect_recog_widen_plus_pattern");
    1711                 :             : }
    1712                 :             : 
    1713                 :             : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
    1714                 :             :    to IFN_VEC_WIDEN_MINUS.  See vect_recog_widen_op_pattern for details.  */
    1715                 :             : static gimple *
    1716                 :    25065760 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
    1717                 :             :                                tree *type_out)
    1718                 :             : {
    1719                 :    25065760 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    1720                 :             :                                       MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
    1721                 :    25065760 :                                       false, "vect_recog_widen_minus_pattern");
    1722                 :             : }
    1723                 :             : 
    1724                 :             : /* Try to detect abd on widened inputs, converting IFN_ABD
    1725                 :             :    to IFN_VEC_WIDEN_ABD.  */
    1726                 :             : static gimple *
    1727                 :    25065760 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1728                 :             :                               tree *type_out)
    1729                 :             : {
    1730                 :    25065760 :   gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
    1731                 :    23664148 :   if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
    1732                 :             :     return NULL;
    1733                 :             : 
    1734                 :     2365310 :   tree last_rhs = gimple_assign_rhs1 (last_stmt);
    1735                 :             : 
    1736                 :     2365310 :   tree in_type = TREE_TYPE (last_rhs);
    1737                 :     2365310 :   tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    1738                 :     2365310 :   if (!INTEGRAL_TYPE_P (in_type)
    1739                 :     2112721 :       || !INTEGRAL_TYPE_P (out_type)
    1740                 :     2024100 :       || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
    1741                 :     2829423 :       || !TYPE_UNSIGNED (in_type))
    1742                 :             :     return NULL;
    1743                 :             : 
    1744                 :      144771 :   vect_unpromoted_value unprom;
    1745                 :      144771 :   tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
    1746                 :      144771 :   if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
    1747                 :             :     return NULL;
    1748                 :             : 
    1749                 :      144228 :   stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
    1750                 :      144228 :   if (!abd_pattern_vinfo)
    1751                 :             :     return NULL;
    1752                 :             : 
    1753                 :      131359 :   abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
    1754                 :    25074741 :   gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
    1755                 :        8981 :   if (!abd_stmt
    1756                 :        8981 :       || !gimple_call_internal_p (abd_stmt)
    1757                 :         226 :       || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
    1758                 :             :     return NULL;
    1759                 :             : 
    1760                 :           0 :   tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
    1761                 :           0 :   tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
    1762                 :             : 
    1763                 :           0 :   code_helper dummy_code;
    1764                 :           0 :   int dummy_int;
    1765                 :           0 :   auto_vec<tree> dummy_vec;
    1766                 :           0 :   if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
    1767                 :             :                                        vectype_out, vectype_in,
    1768                 :             :                                        &dummy_code, &dummy_code,
    1769                 :             :                                        &dummy_int, &dummy_vec))
    1770                 :             :     return NULL;
    1771                 :             : 
    1772                 :           0 :   vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
    1773                 :             : 
    1774                 :           0 :   *type_out = vectype_out;
    1775                 :             : 
    1776                 :           0 :   tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
    1777                 :           0 :   tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
    1778                 :           0 :   tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
    1779                 :           0 :   gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
    1780                 :             :                                                       abd_oprnd0, abd_oprnd1);
    1781                 :           0 :   gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
    1782                 :           0 :   gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
    1783                 :           0 :   return widen_abd_stmt;
    1784                 :           0 : }
    1785                 :             : 
    1786                 :             : /* Function vect_recog_ctz_ffs_pattern
    1787                 :             : 
    1788                 :             :    Try to find the following pattern:
    1789                 :             : 
    1790                 :             :    TYPE1 A;
    1791                 :             :    TYPE1 B;
    1792                 :             : 
    1793                 :             :    B = __builtin_ctz{,l,ll} (A);
    1794                 :             : 
    1795                 :             :    or
    1796                 :             : 
    1797                 :             :    B = __builtin_ffs{,l,ll} (A);
    1798                 :             : 
    1799                 :             :    Input:
    1800                 :             : 
    1801                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    1802                 :             :    here it starts with B = __builtin_* (A);
    1803                 :             : 
    1804                 :             :    Output:
    1805                 :             : 
    1806                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    1807                 :             : 
    1808                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    1809                 :             :    stmts that constitute the pattern, using clz or popcount builtins.  */
    1810                 :             : 
    1811                 :             : static gimple *
    1812                 :    24961948 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
    1813                 :             :                             tree *type_out)
    1814                 :             : {
    1815                 :    24961948 :   gimple *call_stmt = stmt_vinfo->stmt;
    1816                 :    24961948 :   gimple *pattern_stmt;
    1817                 :    24961948 :   tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
    1818                 :    24961948 :   tree new_var;
    1819                 :    24961948 :   internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
    1820                 :    24961948 :   bool defined_at_zero = true, defined_at_zero_new = false;
    1821                 :    24961948 :   int val = 0, val_new = 0, val_cmp = 0;
    1822                 :    24961948 :   int prec;
    1823                 :    24961948 :   int sub = 0, add = 0;
    1824                 :    24961948 :   location_t loc;
    1825                 :             : 
    1826                 :    24961948 :   if (!is_gimple_call (call_stmt))
    1827                 :             :     return NULL;
    1828                 :             : 
    1829                 :     3079552 :   if (gimple_call_num_args (call_stmt) != 1
    1830                 :     3079552 :       && gimple_call_num_args (call_stmt) != 2)
    1831                 :             :     return NULL;
    1832                 :             : 
    1833                 :     1683202 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    1834                 :     1683202 :   rhs_type = TREE_TYPE (rhs_oprnd);
    1835                 :     1683202 :   lhs_oprnd = gimple_call_lhs (call_stmt);
    1836                 :     1683202 :   if (!lhs_oprnd)
    1837                 :             :     return NULL;
    1838                 :      851422 :   lhs_type = TREE_TYPE (lhs_oprnd);
    1839                 :      851422 :   if (!INTEGRAL_TYPE_P (lhs_type)
    1840                 :      277459 :       || !INTEGRAL_TYPE_P (rhs_type)
    1841                 :       42567 :       || !type_has_mode_precision_p (rhs_type)
    1842                 :      892559 :       || TREE_CODE (rhs_oprnd) != SSA_NAME)
    1843                 :      820556 :     return NULL;
    1844                 :             : 
    1845                 :       30866 :   switch (gimple_call_combined_fn (call_stmt))
    1846                 :             :     {
    1847                 :        1069 :     CASE_CFN_CTZ:
    1848                 :        1069 :       ifn = IFN_CTZ;
    1849                 :        1069 :       if (!gimple_call_internal_p (call_stmt)
    1850                 :        1069 :           || gimple_call_num_args (call_stmt) != 2)
    1851                 :             :         defined_at_zero = false;
    1852                 :             :       else
    1853                 :          48 :         val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    1854                 :             :       break;
    1855                 :             :     CASE_CFN_FFS:
    1856                 :             :       ifn = IFN_FFS;
    1857                 :             :       break;
    1858                 :             :     default:
    1859                 :             :       return NULL;
    1860                 :             :     }
    1861                 :             : 
    1862                 :        1190 :   prec = TYPE_PRECISION (rhs_type);
    1863                 :        1190 :   loc = gimple_location (call_stmt);
    1864                 :             : 
    1865                 :        1190 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    1866                 :        1190 :   if (!vec_type)
    1867                 :             :     return NULL;
    1868                 :             : 
    1869                 :        1190 :   vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1870                 :        1190 :   if (!vec_rhs_type)
    1871                 :             :     return NULL;
    1872                 :             : 
    1873                 :             :   /* Do it only if the backend doesn't have ctz<vector_mode>2 or
    1874                 :             :      ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
    1875                 :             :      popcount<vector_mode>2.  */
    1876                 :         970 :   if (!vec_type
    1877                 :         970 :       || direct_internal_fn_supported_p (ifn, vec_rhs_type,
    1878                 :             :                                          OPTIMIZE_FOR_SPEED))
    1879                 :             :     return NULL;
    1880                 :             : 
    1881                 :         970 :   if (ifn == IFN_FFS
    1882                 :         970 :       && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
    1883                 :             :                                          OPTIMIZE_FOR_SPEED))
    1884                 :             :     {
    1885                 :           0 :       ifnnew = IFN_CTZ;
    1886                 :           0 :       defined_at_zero_new
    1887                 :           0 :         = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1888                 :             :                                      val_new) == 2;
    1889                 :             :     }
    1890                 :         970 :   else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
    1891                 :             :                                            OPTIMIZE_FOR_SPEED))
    1892                 :             :     {
    1893                 :          88 :       ifnnew = IFN_CLZ;
    1894                 :          88 :       defined_at_zero_new
    1895                 :         176 :         = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
    1896                 :             :                                      val_new) == 2;
    1897                 :             :     }
    1898                 :         970 :   if ((ifnnew == IFN_LAST
    1899                 :          88 :        || (defined_at_zero && !defined_at_zero_new))
    1900                 :         970 :       && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
    1901                 :             :                                          OPTIMIZE_FOR_SPEED))
    1902                 :             :     {
    1903                 :             :       ifnnew = IFN_POPCOUNT;
    1904                 :             :       defined_at_zero_new = true;
    1905                 :             :       val_new = prec;
    1906                 :             :     }
    1907                 :         970 :   if (ifnnew == IFN_LAST)
    1908                 :             :     return NULL;
    1909                 :             : 
    1910                 :         124 :   vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
    1911                 :             : 
    1912                 :         124 :   val_cmp = val_new;
    1913                 :         124 :   if ((ifnnew == IFN_CLZ
    1914                 :         124 :        && defined_at_zero
    1915                 :          60 :        && defined_at_zero_new
    1916                 :          60 :        && val == prec
    1917                 :          31 :        && val_new == prec)
    1918                 :          93 :       || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
    1919                 :             :     {
    1920                 :             :       /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
    1921                 :             :          .CTZ (X) = .POPCOUNT ((X - 1) & ~X).  */
    1922                 :             :       if (ifnnew == IFN_CLZ)
    1923                 :             :         sub = prec;
    1924                 :          56 :       val_cmp = prec;
    1925                 :             : 
    1926                 :          56 :       if (!TYPE_UNSIGNED (rhs_type))
    1927                 :             :         {
    1928                 :          12 :           rhs_type = unsigned_type_for (rhs_type);
    1929                 :          12 :           vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
    1930                 :          12 :           new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1931                 :          12 :           pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
    1932                 :          12 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
    1933                 :             :                                   vec_rhs_type);
    1934                 :          12 :           rhs_oprnd = new_var;
    1935                 :             :         }
    1936                 :             : 
    1937                 :          56 :       tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
    1938                 :          56 :       pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
    1939                 :          56 :                                           build_int_cst (rhs_type, -1));
    1940                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1941                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1942                 :             : 
    1943                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1944                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
    1945                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1946                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1947                 :          56 :       rhs_oprnd = new_var;
    1948                 :             : 
    1949                 :          56 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1950                 :          56 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1951                 :             :                                           m1, rhs_oprnd);
    1952                 :          56 :       gimple_set_location (pattern_stmt, loc);
    1953                 :          56 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1954                 :          56 :       rhs_oprnd = new_var;
    1955                 :          56 :     }
    1956                 :          68 :   else if (ifnnew == IFN_CLZ)
    1957                 :             :     {
    1958                 :             :       /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
    1959                 :             :          .FFS (X) = PREC - .CLZ (X & -X).  */
    1960                 :          57 :       sub = prec - (ifn == IFN_CTZ);
    1961                 :          57 :       val_cmp = sub - val_new;
    1962                 :             : 
    1963                 :          57 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1964                 :          57 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1965                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1966                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1967                 :             : 
    1968                 :          57 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1969                 :          57 :       pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
    1970                 :             :                                           rhs_oprnd, neg);
    1971                 :          57 :       gimple_set_location (pattern_stmt, loc);
    1972                 :          57 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1973                 :          57 :       rhs_oprnd = new_var;
    1974                 :             :     }
    1975                 :          11 :   else if (ifnnew == IFN_POPCOUNT)
    1976                 :             :     {
    1977                 :             :       /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
    1978                 :             :          .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X).  */
    1979                 :          11 :       sub = prec + (ifn == IFN_FFS);
    1980                 :          11 :       val_cmp = sub;
    1981                 :             : 
    1982                 :          11 :       tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
    1983                 :          11 :       pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
    1984                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1985                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1986                 :             : 
    1987                 :          11 :       new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
    1988                 :          11 :       pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
    1989                 :             :                                           rhs_oprnd, neg);
    1990                 :          11 :       gimple_set_location (pattern_stmt, loc);
    1991                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
    1992                 :          11 :       rhs_oprnd = new_var;
    1993                 :             :     }
    1994                 :           0 :   else if (ifnnew == IFN_CTZ)
    1995                 :             :     {
    1996                 :             :       /* .FFS (X) = .CTZ (X) + 1.  */
    1997                 :           0 :       add = 1;
    1998                 :           0 :       val_cmp++;
    1999                 :             :     }
    2000                 :             : 
    2001                 :             :   /* Create B = .IFNNEW (A).  */
    2002                 :         124 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2003                 :         124 :   if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
    2004                 :          88 :     pattern_stmt
    2005                 :          88 :       = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
    2006                 :             :                                     build_int_cst (integer_type_node,
    2007                 :             :                                                    val_new));
    2008                 :             :   else
    2009                 :          36 :     pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
    2010                 :         124 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2011                 :         124 :   gimple_set_location (pattern_stmt, loc);
    2012                 :         124 :   *type_out = vec_type;
    2013                 :             : 
    2014                 :         124 :   if (sub)
    2015                 :             :     {
    2016                 :          99 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2017                 :          99 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2018                 :          99 :       pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
    2019                 :             :                                           build_int_cst (lhs_type, sub),
    2020                 :             :                                           new_var);
    2021                 :          99 :       gimple_set_location (pattern_stmt, loc);
    2022                 :          99 :       new_var = ret_var;
    2023                 :             :     }
    2024                 :          25 :   else if (add)
    2025                 :             :     {
    2026                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2027                 :           0 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2028                 :           0 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2029                 :             :                                           build_int_cst (lhs_type, add));
    2030                 :           0 :       gimple_set_location (pattern_stmt, loc);
    2031                 :           0 :       new_var = ret_var;
    2032                 :             :     }
    2033                 :             : 
    2034                 :         124 :   if (defined_at_zero
    2035                 :          88 :       && (!defined_at_zero_new || val != val_cmp))
    2036                 :             :     {
    2037                 :          11 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2038                 :          11 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2039                 :          11 :       rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2040                 :          11 :       rhs_type = TREE_TYPE (rhs_oprnd);
    2041                 :          11 :       tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
    2042                 :             :                              rhs_oprnd, build_zero_cst (rhs_type));
    2043                 :          11 :       pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
    2044                 :             :                                           new_var,
    2045                 :             :                                           build_int_cst (lhs_type, val));
    2046                 :             :     }
    2047                 :             : 
    2048                 :         124 :   if (dump_enabled_p ())
    2049                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    2050                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2051                 :             : 
    2052                 :             :   return pattern_stmt;
    2053                 :             : }
    2054                 :             : 
    2055                 :             : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
    2056                 :             : 
    2057                 :             :    Try to find the following pattern:
    2058                 :             : 
    2059                 :             :    UTYPE1 A;
    2060                 :             :    TYPE1 B;
    2061                 :             :    UTYPE2 temp_in;
    2062                 :             :    TYPE3 temp_out;
    2063                 :             :    temp_in = (UTYPE2)A;
    2064                 :             : 
    2065                 :             :    temp_out = __builtin_popcount{,l,ll} (temp_in);
    2066                 :             :    B = (TYPE1) temp_out;
    2067                 :             : 
    2068                 :             :    TYPE2 may or may not be equal to TYPE3.
    2069                 :             :    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
    2070                 :             :    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
    2071                 :             : 
    2072                 :             :    Input:
    2073                 :             : 
    2074                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2075                 :             :    here it starts with B = (TYPE1) temp_out;
    2076                 :             : 
    2077                 :             :    Output:
    2078                 :             : 
    2079                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2080                 :             : 
    2081                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2082                 :             :    stmts that constitute the pattern. In this case it will be:
    2083                 :             :    B = .POPCOUNT (A);
    2084                 :             : 
    2085                 :             :    Similarly for clz, ctz and ffs.
    2086                 :             : */
    2087                 :             : 
    2088                 :             : static gimple *
    2089                 :    24961818 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
    2090                 :             :                                          stmt_vec_info stmt_vinfo,
    2091                 :             :                                          tree *type_out)
    2092                 :             : {
    2093                 :    24961818 :   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
    2094                 :    24961818 :   gimple *call_stmt, *pattern_stmt;
    2095                 :    24961818 :   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
    2096                 :    24961818 :   internal_fn ifn = IFN_LAST;
    2097                 :    24961818 :   int addend = 0;
    2098                 :             : 
    2099                 :             :   /* Find B = (TYPE1) temp_out. */
    2100                 :    24961818 :   if (!last_stmt)
    2101                 :             :     return NULL;
    2102                 :    17149203 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2103                 :    17149203 :   if (!CONVERT_EXPR_CODE_P (code))
    2104                 :             :     return NULL;
    2105                 :             : 
    2106                 :     2343885 :   lhs_oprnd = gimple_assign_lhs (last_stmt);
    2107                 :     2343885 :   lhs_type = TREE_TYPE (lhs_oprnd);
    2108                 :     2343885 :   if (!INTEGRAL_TYPE_P (lhs_type))
    2109                 :             :     return NULL;
    2110                 :             : 
    2111                 :     2199595 :   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
    2112                 :     2199595 :   if (TREE_CODE (rhs_oprnd) != SSA_NAME
    2113                 :     2199595 :       || !has_single_use (rhs_oprnd))
    2114                 :             :     return NULL;
    2115                 :     1161521 :   call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
    2116                 :             : 
    2117                 :             :   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
    2118                 :     1161521 :   if (!is_gimple_call (call_stmt))
    2119                 :             :     return NULL;
    2120                 :       93905 :   switch (gimple_call_combined_fn (call_stmt))
    2121                 :             :     {
    2122                 :             :       int val;
    2123                 :             :     CASE_CFN_POPCOUNT:
    2124                 :             :       ifn = IFN_POPCOUNT;
    2125                 :             :       break;
    2126                 :        2303 :     CASE_CFN_CLZ:
    2127                 :        2303 :       ifn = IFN_CLZ;
    2128                 :             :       /* Punt if call result is unsigned and defined value at zero
    2129                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2130                 :        2303 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2131                 :           0 :           && gimple_call_internal_p (call_stmt)
    2132                 :        2303 :           && CLZ_DEFINED_VALUE_AT_ZERO
    2133                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2134                 :        2303 :           && val < 0)
    2135                 :             :         return NULL;
    2136                 :             :       break;
    2137                 :         523 :     CASE_CFN_CTZ:
    2138                 :         523 :       ifn = IFN_CTZ;
    2139                 :             :       /* Punt if call result is unsigned and defined value at zero
    2140                 :             :          is negative, as the negative value doesn't extend correctly.  */
    2141                 :         523 :       if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
    2142                 :           0 :           && gimple_call_internal_p (call_stmt)
    2143                 :         523 :           && CTZ_DEFINED_VALUE_AT_ZERO
    2144                 :             :                (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
    2145                 :         523 :           && val < 0)
    2146                 :             :         return NULL;
    2147                 :             :       break;
    2148                 :          18 :     CASE_CFN_FFS:
    2149                 :          18 :       ifn = IFN_FFS;
    2150                 :          18 :       break;
    2151                 :             :     default:
    2152                 :             :       return NULL;
    2153                 :             :     }
    2154                 :             : 
    2155                 :        3075 :   if (gimple_call_num_args (call_stmt) != 1
    2156                 :        3075 :       && gimple_call_num_args (call_stmt) != 2)
    2157                 :             :     return NULL;
    2158                 :             : 
    2159                 :        3075 :   rhs_oprnd = gimple_call_arg (call_stmt, 0);
    2160                 :        3075 :   vect_unpromoted_value unprom_diff;
    2161                 :        3075 :   rhs_origin
    2162                 :        3075 :     = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
    2163                 :             : 
    2164                 :        3075 :   if (!rhs_origin)
    2165                 :             :     return NULL;
    2166                 :             : 
    2167                 :             :   /* Input and output of .POPCOUNT should be same-precision integer.  */
    2168                 :        3075 :   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
    2169                 :             :     return NULL;
    2170                 :             : 
    2171                 :             :   /* Also A should be unsigned or same precision as temp_in, otherwise
    2172                 :             :      different builtins/internal functions have different behaviors.  */
    2173                 :        1268 :   if (TYPE_PRECISION (unprom_diff.type)
    2174                 :        1268 :       != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
    2175                 :         171 :     switch (ifn)
    2176                 :             :       {
    2177                 :          68 :       case IFN_POPCOUNT:
    2178                 :             :         /* For popcount require zero extension, which doesn't add any
    2179                 :             :            further bits to the count.  */
    2180                 :          68 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2181                 :             :           return NULL;
    2182                 :             :         break;
    2183                 :          85 :       case IFN_CLZ:
    2184                 :             :         /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
    2185                 :             :            if it is undefined at zero or if it matches also for the
    2186                 :             :            defined value there.  */
    2187                 :          85 :         if (!TYPE_UNSIGNED (unprom_diff.type))
    2188                 :             :           return NULL;
    2189                 :          85 :         if (!type_has_mode_precision_p (lhs_type)
    2190                 :          85 :             || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
    2191                 :           0 :           return NULL;
    2192                 :          85 :         addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
    2193                 :          85 :                   - TYPE_PRECISION (lhs_type));
    2194                 :          85 :         if (gimple_call_internal_p (call_stmt)
    2195                 :          85 :             && gimple_call_num_args (call_stmt) == 2)
    2196                 :             :           {
    2197                 :           0 :             int val1, val2;
    2198                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2199                 :           0 :             int d2
    2200                 :           0 :               = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2201                 :             :                                            val2);
    2202                 :           0 :             if (d2 != 2 || val1 != val2 + addend)
    2203                 :             :               return NULL;
    2204                 :             :           }
    2205                 :             :         break;
    2206                 :          13 :       case IFN_CTZ:
    2207                 :             :         /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
    2208                 :             :            if it is undefined at zero or if it matches also for the
    2209                 :             :            defined value there.  */
    2210                 :          13 :         if (gimple_call_internal_p (call_stmt)
    2211                 :          13 :             && gimple_call_num_args (call_stmt) == 2)
    2212                 :             :           {
    2213                 :           0 :             int val1, val2;
    2214                 :           0 :             val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
    2215                 :           0 :             int d2
    2216                 :           0 :               = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2217                 :             :                                            val2);
    2218                 :           0 :             if (d2 != 2 || val1 != val2)
    2219                 :             :               return NULL;
    2220                 :             :           }
    2221                 :             :         break;
    2222                 :             :       case IFN_FFS:
    2223                 :             :         /* ffsll (x) == ffs (x) for unsigned or signed x.  */
    2224                 :             :         break;
    2225                 :           0 :       default:
    2226                 :           0 :         gcc_unreachable ();
    2227                 :             :       }
    2228                 :             : 
    2229                 :        1268 :   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
    2230                 :             :   /* Do it only if the backend has popcount<vector_mode>2 etc. pattern.  */
    2231                 :        1268 :   if (!vec_type)
    2232                 :             :     return NULL;
    2233                 :             : 
    2234                 :        1153 :   bool supported
    2235                 :        1153 :     = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
    2236                 :        1153 :   if (!supported)
    2237                 :        1067 :     switch (ifn)
    2238                 :             :       {
    2239                 :             :       case IFN_POPCOUNT:
    2240                 :             :       case IFN_CLZ:
    2241                 :             :         return NULL;
    2242                 :          18 :       case IFN_FFS:
    2243                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs using ctz.  */
    2244                 :          18 :         if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
    2245                 :             :                                             OPTIMIZE_FOR_SPEED))
    2246                 :             :           break;
    2247                 :             :         /* FALLTHRU */
    2248                 :         335 :       case IFN_CTZ:
    2249                 :             :         /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
    2250                 :             :            clz or popcount.  */
    2251                 :         335 :         if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
    2252                 :             :                                             OPTIMIZE_FOR_SPEED))
    2253                 :             :           break;
    2254                 :         301 :         if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
    2255                 :             :                                             OPTIMIZE_FOR_SPEED))
    2256                 :             :           break;
    2257                 :             :         return NULL;
    2258                 :           0 :       default:
    2259                 :           0 :         gcc_unreachable ();
    2260                 :             :       }
    2261                 :             : 
    2262                 :         142 :   vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
    2263                 :             :                          call_stmt);
    2264                 :             : 
    2265                 :             :   /* Create B = .POPCOUNT (A).  */
    2266                 :         142 :   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2267                 :         142 :   tree arg2 = NULL_TREE;
    2268                 :         142 :   int val;
    2269                 :         142 :   if (ifn == IFN_CLZ
    2270                 :         174 :       && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2271                 :             :                                     val) == 2)
    2272                 :          30 :     arg2 = build_int_cst (integer_type_node, val);
    2273                 :         112 :   else if (ifn == IFN_CTZ
    2274                 :         150 :            && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
    2275                 :             :                                          val) == 2)
    2276                 :          38 :     arg2 = build_int_cst (integer_type_node, val);
    2277                 :         142 :   if (arg2)
    2278                 :          68 :     pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
    2279                 :             :   else
    2280                 :          74 :     pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
    2281                 :         142 :   gimple_call_set_lhs (pattern_stmt, new_var);
    2282                 :         142 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    2283                 :         142 :   *type_out = vec_type;
    2284                 :             : 
    2285                 :         142 :   if (dump_enabled_p ())
    2286                 :           8 :     dump_printf_loc (MSG_NOTE, vect_location,
    2287                 :             :                      "created pattern stmt: %G", pattern_stmt);
    2288                 :             : 
    2289                 :         142 :   if (addend)
    2290                 :             :     {
    2291                 :           6 :       gcc_assert (supported);
    2292                 :           6 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
    2293                 :           6 :       tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    2294                 :           6 :       pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
    2295                 :             :                                           build_int_cst (lhs_type, addend));
    2296                 :             :     }
    2297                 :         136 :   else if (!supported)
    2298                 :             :     {
    2299                 :          56 :       stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
    2300                 :          56 :       STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
    2301                 :          56 :       pattern_stmt
    2302                 :          56 :         = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
    2303                 :          56 :       if (pattern_stmt == NULL)
    2304                 :             :         return NULL;
    2305                 :          56 :       if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
    2306                 :             :         {
    2307                 :          56 :           gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
    2308                 :          56 :           gimple_seq_add_seq_without_update (pseq, seq);
    2309                 :             :         }
    2310                 :             :     }
    2311                 :             :   return pattern_stmt;
    2312                 :             : }
    2313                 :             : 
    2314                 :             : /* Function vect_recog_pow_pattern
    2315                 :             : 
    2316                 :             :    Try to find the following pattern:
    2317                 :             : 
    2318                 :             :      x = POW (y, N);
    2319                 :             : 
    2320                 :             :    with POW being one of pow, powf, powi, powif and N being
    2321                 :             :    either 2 or 0.5.
    2322                 :             : 
    2323                 :             :    Input:
    2324                 :             : 
    2325                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2326                 :             : 
    2327                 :             :    Output:
    2328                 :             : 
    2329                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2330                 :             : 
    2331                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2332                 :             :    stmts that constitute the pattern. In this case it will be:
    2333                 :             :         x = x * x
    2334                 :             :    or
    2335                 :             :         x = sqrt (x)
    2336                 :             : */
    2337                 :             : 
    2338                 :             : static gimple *
    2339                 :    24961824 : vect_recog_pow_pattern (vec_info *vinfo,
    2340                 :             :                         stmt_vec_info stmt_vinfo, tree *type_out)
    2341                 :             : {
    2342                 :    24961824 :   gimple *last_stmt = stmt_vinfo->stmt;
    2343                 :    24961824 :   tree base, exp;
    2344                 :    24961824 :   gimple *stmt;
    2345                 :    24961824 :   tree var;
    2346                 :             : 
    2347                 :    24961824 :   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
    2348                 :             :     return NULL;
    2349                 :             : 
    2350                 :     1353869 :   switch (gimple_call_combined_fn (last_stmt))
    2351                 :             :     {
    2352                 :         364 :     CASE_CFN_POW:
    2353                 :         364 :     CASE_CFN_POWI:
    2354                 :         364 :       break;
    2355                 :             : 
    2356                 :             :     default:
    2357                 :             :       return NULL;
    2358                 :             :     }
    2359                 :             : 
    2360                 :         364 :   base = gimple_call_arg (last_stmt, 0);
    2361                 :         364 :   exp = gimple_call_arg (last_stmt, 1);
    2362                 :         364 :   if (TREE_CODE (exp) != REAL_CST
    2363                 :         341 :       && TREE_CODE (exp) != INTEGER_CST)
    2364                 :             :     {
    2365                 :         341 :       if (flag_unsafe_math_optimizations
    2366                 :          95 :           && TREE_CODE (base) == REAL_CST
    2367                 :         423 :           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
    2368                 :             :         {
    2369                 :          82 :           combined_fn log_cfn;
    2370                 :          82 :           built_in_function exp_bfn;
    2371                 :          82 :           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
    2372                 :             :             {
    2373                 :             :             case BUILT_IN_POW:
    2374                 :             :               log_cfn = CFN_BUILT_IN_LOG;
    2375                 :             :               exp_bfn = BUILT_IN_EXP;
    2376                 :             :               break;
    2377                 :          80 :             case BUILT_IN_POWF:
    2378                 :          80 :               log_cfn = CFN_BUILT_IN_LOGF;
    2379                 :          80 :               exp_bfn = BUILT_IN_EXPF;
    2380                 :          80 :               break;
    2381                 :           0 :             case BUILT_IN_POWL:
    2382                 :           0 :               log_cfn = CFN_BUILT_IN_LOGL;
    2383                 :           0 :               exp_bfn = BUILT_IN_EXPL;
    2384                 :           0 :               break;
    2385                 :             :             default:
    2386                 :             :               return NULL;
    2387                 :             :             }
    2388                 :          82 :           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
    2389                 :          82 :           tree exp_decl = builtin_decl_implicit (exp_bfn);
    2390                 :             :           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
    2391                 :             :              does that, but if C is a power of 2, we want to use
    2392                 :             :              exp2 (log2 (C) * x) in the non-vectorized version, but for
    2393                 :             :              vectorization we don't have vectorized exp2.  */
    2394                 :          82 :           if (logc
    2395                 :           2 :               && TREE_CODE (logc) == REAL_CST
    2396                 :           2 :               && exp_decl
    2397                 :          84 :               && lookup_attribute ("omp declare simd",
    2398                 :           2 :                                    DECL_ATTRIBUTES (exp_decl)))
    2399                 :             :             {
    2400                 :           2 :               cgraph_node *node = cgraph_node::get_create (exp_decl);
    2401                 :           2 :               if (node->simd_clones == NULL)
    2402                 :             :                 {
    2403                 :           2 :                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
    2404                 :           2 :                       || node->definition)
    2405                 :             :                     return NULL;
    2406                 :           2 :                   expand_simd_clones (node);
    2407                 :           2 :                   if (node->simd_clones == NULL)
    2408                 :             :                     return NULL;
    2409                 :             :                 }
    2410                 :           2 :               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2411                 :           2 :               if (!*type_out)
    2412                 :             :                 return NULL;
    2413                 :           2 :               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2414                 :           2 :               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
    2415                 :           2 :               append_pattern_def_seq (vinfo, stmt_vinfo, g);
    2416                 :           2 :               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2417                 :           2 :               g = gimple_build_call (exp_decl, 1, def);
    2418                 :           2 :               gimple_call_set_lhs (g, res);
    2419                 :           2 :               return g;
    2420                 :             :             }
    2421                 :             :         }
    2422                 :             : 
    2423                 :         339 :       return NULL;
    2424                 :             :     }
    2425                 :             : 
    2426                 :             :   /* We now have a pow or powi builtin function call with a constant
    2427                 :             :      exponent.  */
    2428                 :             : 
    2429                 :             :   /* Catch squaring.  */
    2430                 :          23 :   if ((tree_fits_shwi_p (exp)
    2431                 :           0 :        && tree_to_shwi (exp) == 2)
    2432                 :          23 :       || (TREE_CODE (exp) == REAL_CST
    2433                 :          23 :           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
    2434                 :             :     {
    2435                 :           0 :       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
    2436                 :           0 :                                             TREE_TYPE (base), type_out))
    2437                 :             :         return NULL;
    2438                 :             : 
    2439                 :           0 :       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
    2440                 :           0 :       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
    2441                 :           0 :       return stmt;
    2442                 :             :     }
    2443                 :             : 
    2444                 :             :   /* Catch square root.  */
    2445                 :          23 :   if (TREE_CODE (exp) == REAL_CST
    2446                 :          23 :       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
    2447                 :             :     {
    2448                 :           8 :       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
    2449                 :           8 :       if (*type_out
    2450                 :           8 :           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
    2451                 :             :                                              OPTIMIZE_FOR_SPEED))
    2452                 :             :         {
    2453                 :           6 :           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
    2454                 :           6 :           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
    2455                 :           6 :           gimple_call_set_lhs (stmt, var);
    2456                 :           6 :           gimple_call_set_nothrow (stmt, true);
    2457                 :           6 :           return stmt;
    2458                 :             :         }
    2459                 :             :     }
    2460                 :             : 
    2461                 :             :   return NULL;
    2462                 :             : }
    2463                 :             : 
    2464                 :             : 
    2465                 :             : /* Function vect_recog_widen_sum_pattern
    2466                 :             : 
    2467                 :             :    Try to find the following pattern:
    2468                 :             : 
    2469                 :             :      type x_t;
    2470                 :             :      TYPE x_T, sum = init;
    2471                 :             :    loop:
    2472                 :             :      sum_0 = phi <init, sum_1>
    2473                 :             :      S1  x_t = *p;
    2474                 :             :      S2  x_T = (TYPE) x_t;
    2475                 :             :      S3  sum_1 = x_T + sum_0;
    2476                 :             : 
    2477                 :             :    where type 'TYPE' is at least double the size of type 'type', i.e - we're
    2478                 :             :    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
    2479                 :             :    a special case of a reduction computation.
    2480                 :             : 
    2481                 :             :    Input:
    2482                 :             : 
    2483                 :             :    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
    2484                 :             :    when this function is called with S3, the pattern {S2,S3} will be detected.
    2485                 :             : 
    2486                 :             :    Output:
    2487                 :             : 
    2488                 :             :    * TYPE_OUT: The type of the output of this pattern.
    2489                 :             : 
    2490                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2491                 :             :    stmts that constitute the pattern. In this case it will be:
    2492                 :             :         WIDEN_SUM <x_t, sum_0>
    2493                 :             : 
    2494                 :             :    Note: The widening-sum idiom is a widening reduction pattern that is
    2495                 :             :          vectorized without preserving all the intermediate results. It
    2496                 :             :          produces only N/2 (widened) results (by summing up pairs of
    2497                 :             :          intermediate results) rather than all N results.  Therefore, we
    2498                 :             :          cannot allow this pattern when we want to get all the results and in
    2499                 :             :          the correct order (as is the case when this computation is in an
    2500                 :             :          inner-loop nested in an outer-loop that us being vectorized).  */
    2501                 :             : 
    2502                 :             : static gimple *
    2503                 :    24961824 : vect_recog_widen_sum_pattern (vec_info *vinfo,
    2504                 :             :                               stmt_vec_info stmt_vinfo, tree *type_out)
    2505                 :             : {
    2506                 :    24961824 :   gimple *last_stmt = stmt_vinfo->stmt;
    2507                 :    24961824 :   tree oprnd0, oprnd1;
    2508                 :    24961824 :   tree type;
    2509                 :    24961824 :   gimple *pattern_stmt;
    2510                 :    24961824 :   tree var;
    2511                 :             : 
    2512                 :             :   /* Look for the following pattern
    2513                 :             :           DX = (TYPE) X;
    2514                 :             :           sum_1 = DX + sum_0;
    2515                 :             :      In which DX is at least double the size of X, and sum_1 has been
    2516                 :             :      recognized as a reduction variable.
    2517                 :             :    */
    2518                 :             : 
    2519                 :             :   /* Starting from LAST_STMT, follow the defs of its uses in search
    2520                 :             :      of the above pattern.  */
    2521                 :             : 
    2522                 :    24961824 :   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
    2523                 :             :                                        &oprnd0, &oprnd1)
    2524                 :       33436 :       || TREE_CODE (oprnd0) != SSA_NAME
    2525                 :    24995089 :       || !vinfo->lookup_def (oprnd0))
    2526                 :    24928609 :     return NULL;
    2527                 :             : 
    2528                 :       33215 :   type = TREE_TYPE (gimple_get_lhs (last_stmt));
    2529                 :             : 
    2530                 :             :   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
    2531                 :             :      we know that oprnd1 is the reduction variable (defined by a loop-header
    2532                 :             :      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
    2533                 :             :      Left to check that oprnd0 is defined by a cast from type 'type' to type
    2534                 :             :      'TYPE'.  */
    2535                 :             : 
    2536                 :       33215 :   vect_unpromoted_value unprom0;
    2537                 :       33215 :   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
    2538                 :       33215 :       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
    2539                 :             :     return NULL;
    2540                 :             : 
    2541                 :        1555 :   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
    2542                 :             : 
    2543                 :        1555 :   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
    2544                 :             :                                         unprom0.type, type_out))
    2545                 :             :     return NULL;
    2546                 :             : 
    2547                 :           0 :   var = vect_recog_temp_ssa_var (type, NULL);
    2548                 :           0 :   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
    2549                 :             : 
    2550                 :           0 :   return pattern_stmt;
    2551                 :             : }
    2552                 :             : 
    2553                 :             : /* Function vect_recog_bitfield_ref_pattern
    2554                 :             : 
    2555                 :             :    Try to find the following pattern:
    2556                 :             : 
    2557                 :             :    bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
    2558                 :             :    result = (type_out) bf_value;
    2559                 :             : 
    2560                 :             :    or
    2561                 :             : 
    2562                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2563                 :             : 
    2564                 :             :    where type_out is a non-bitfield type, that is to say, it's precision matches
    2565                 :             :    2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
    2566                 :             : 
    2567                 :             :    Input:
    2568                 :             : 
    2569                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    2570                 :             :    here it starts with:
    2571                 :             :    result = (type_out) bf_value;
    2572                 :             : 
    2573                 :             :    or
    2574                 :             : 
    2575                 :             :    if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
    2576                 :             : 
    2577                 :             :    Output:
    2578                 :             : 
    2579                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2580                 :             : 
    2581                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2582                 :             :    stmts that constitute the pattern. If the precision of type_out is bigger
    2583                 :             :    than the precision type of _1 we perform the widening before the shifting,
    2584                 :             :    since the new precision will be large enough to shift the value and moving
    2585                 :             :    widening operations up the statement chain enables the generation of
    2586                 :             :    widening loads.  If we are widening and the operation after the pattern is
    2587                 :             :    an addition then we mask first and shift later, to enable the generation of
    2588                 :             :    shifting adds.  In the case of narrowing we will always mask first, shift
    2589                 :             :    last and then perform a narrowing operation.  This will enable the
    2590                 :             :    generation of narrowing shifts.
    2591                 :             : 
    2592                 :             :    Widening with mask first, shift later:
    2593                 :             :    container = (type_out) container;
    2594                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2595                 :             :    result = masked >> bitpos;
    2596                 :             : 
    2597                 :             :    Widening with shift first, mask last:
    2598                 :             :    container = (type_out) container;
    2599                 :             :    shifted = container >> bitpos;
    2600                 :             :    result = shifted & ((1 << bitsize) - 1);
    2601                 :             : 
    2602                 :             :    Narrowing:
    2603                 :             :    masked = container & (((1 << bitsize) - 1) << bitpos);
    2604                 :             :    result = masked >> bitpos;
    2605                 :             :    result = (type_out) result;
    2606                 :             : 
    2607                 :             :    If the bitfield is signed and it's wider than type_out, we need to
    2608                 :             :    keep the result sign-extended:
    2609                 :             :    container = (type) container;
    2610                 :             :    masked = container << (prec - bitsize - bitpos);
    2611                 :             :    result = (type_out) (masked >> (prec - bitsize));
    2612                 :             : 
    2613                 :             :    Here type is the signed variant of the wider of type_out and the type
    2614                 :             :    of container.
    2615                 :             : 
    2616                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2617                 :             : 
    2618                 :             :    When the original bitfield was inside a gcond then an new gcond is also
    2619                 :             :    generated with the newly `result` as the operand to the comparison.
    2620                 :             : 
    2621                 :             : */
    2622                 :             : 
    2623                 :             : static gimple *
    2624                 :    24910980 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2625                 :             :                                  tree *type_out)
    2626                 :             : {
    2627                 :    24910980 :   gimple *bf_stmt = NULL;
    2628                 :    24910980 :   tree lhs = NULL_TREE;
    2629                 :    24910980 :   tree ret_type = NULL_TREE;
    2630                 :    24910980 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    2631                 :    24910980 :   if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
    2632                 :             :     {
    2633                 :     3913811 :       tree op = gimple_cond_lhs (cond_stmt);
    2634                 :     3913811 :       if (TREE_CODE (op) != SSA_NAME)
    2635                 :             :         return NULL;
    2636                 :     3860028 :       bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
    2637                 :     3860028 :       if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
    2638                 :             :         return NULL;
    2639                 :             :     }
    2640                 :    41994338 :   else if (is_gimple_assign (stmt)
    2641                 :    17098120 :            && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
    2642                 :    23275805 :            && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
    2643                 :             :     {
    2644                 :     2238022 :       gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    2645                 :     2238022 :       bf_stmt = dyn_cast <gassign *> (second_stmt);
    2646                 :     2238022 :       lhs = gimple_assign_lhs (stmt);
    2647                 :     2238022 :       ret_type = TREE_TYPE (lhs);
    2648                 :             :     }
    2649                 :             : 
    2650                 :    23598490 :   if (!bf_stmt
    2651                 :    23598490 :       || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
    2652                 :             :     return NULL;
    2653                 :             : 
    2654                 :       12555 :   tree bf_ref = gimple_assign_rhs1 (bf_stmt);
    2655                 :       12555 :   tree container = TREE_OPERAND (bf_ref, 0);
    2656                 :       12555 :   ret_type = ret_type ? ret_type : TREE_TYPE (container);
    2657                 :             : 
    2658                 :       12555 :   if (!bit_field_offset (bf_ref).is_constant ()
    2659                 :       12555 :       || !bit_field_size (bf_ref).is_constant ()
    2660                 :       12555 :       || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
    2661                 :           0 :     return NULL;
    2662                 :             : 
    2663                 :       24894 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
    2664                 :       12553 :       || !INTEGRAL_TYPE_P (TREE_TYPE (container))
    2665                 :       14037 :       || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
    2666                 :       11073 :     return NULL;
    2667                 :             : 
    2668                 :        1482 :   gimple *use_stmt, *pattern_stmt;
    2669                 :        1482 :   use_operand_p use_p;
    2670                 :        1482 :   bool shift_first = true;
    2671                 :        1482 :   tree container_type = TREE_TYPE (container);
    2672                 :        1482 :   tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2673                 :             : 
    2674                 :             :   /* Calculate shift_n before the adjustments for widening loads, otherwise
    2675                 :             :      the container may change and we have to consider offset change for
    2676                 :             :      widening loads on big endianness.  The shift_n calculated here can be
    2677                 :             :      independent of widening.  */
    2678                 :        1482 :   unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
    2679                 :        1482 :   unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
    2680                 :        1482 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2681                 :        1482 :   if (BYTES_BIG_ENDIAN)
    2682                 :             :     shift_n = prec - shift_n - mask_width;
    2683                 :             : 
    2684                 :        1482 :   bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
    2685                 :        1107 :                    TYPE_PRECISION (ret_type) > mask_width);
    2686                 :        1482 :   bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
    2687                 :        1482 :                      TYPE_PRECISION (ret_type));
    2688                 :             : 
    2689                 :             :   /* We move the conversion earlier if the loaded type is smaller than the
    2690                 :             :      return type to enable the use of widening loads.  And if we need a
    2691                 :             :      sign extension, we need to convert the loaded value early to a signed
    2692                 :             :      type as well.  */
    2693                 :        1482 :   if (ref_sext || load_widen)
    2694                 :             :     {
    2695                 :         681 :       tree type = load_widen ? ret_type : container_type;
    2696                 :         681 :       if (ref_sext)
    2697                 :         653 :         type = gimple_signed_type (type);
    2698                 :         681 :       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
    2699                 :             :                                           NOP_EXPR, container);
    2700                 :         681 :       container = gimple_get_lhs (pattern_stmt);
    2701                 :         681 :       container_type = TREE_TYPE (container);
    2702                 :         681 :       prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2703                 :         681 :       vectype = get_vectype_for_scalar_type (vinfo, container_type);
    2704                 :         681 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2705                 :             :     }
    2706                 :         801 :   else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
    2707                 :             :     /* If we are doing the conversion last then also delay the shift as we may
    2708                 :             :        be able to combine the shift and conversion in certain cases.  */
    2709                 :             :     shift_first = false;
    2710                 :             : 
    2711                 :             :   /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
    2712                 :             :      PLUS_EXPR then do the shift last as some targets can combine the shift and
    2713                 :             :      add into a single instruction.  */
    2714                 :        1482 :   if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
    2715                 :             :     {
    2716                 :         719 :       if (gimple_code (use_stmt) == GIMPLE_ASSIGN
    2717                 :         719 :           && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
    2718                 :             :         shift_first = false;
    2719                 :             :     }
    2720                 :             : 
    2721                 :             :   /* If we don't have to shift we only generate the mask, so just fix the
    2722                 :             :      code-path to shift_first.  */
    2723                 :        1482 :   if (shift_n == 0)
    2724                 :         612 :     shift_first = true;
    2725                 :             : 
    2726                 :        1482 :   tree result;
    2727                 :        1482 :   if (shift_first && !ref_sext)
    2728                 :             :     {
    2729                 :         408 :       tree shifted = container;
    2730                 :         408 :       if (shift_n)
    2731                 :             :         {
    2732                 :          48 :           pattern_stmt
    2733                 :          48 :             = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2734                 :             :                                    RSHIFT_EXPR, container,
    2735                 :             :                                    build_int_cst (sizetype, shift_n));
    2736                 :          48 :           shifted = gimple_assign_lhs (pattern_stmt);
    2737                 :          48 :           append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2738                 :             :         }
    2739                 :             : 
    2740                 :         408 :       tree mask = wide_int_to_tree (container_type,
    2741                 :         408 :                                     wi::mask (mask_width, false, prec));
    2742                 :             : 
    2743                 :         408 :       pattern_stmt
    2744                 :         408 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2745                 :             :                                BIT_AND_EXPR, shifted, mask);
    2746                 :         408 :       result = gimple_assign_lhs (pattern_stmt);
    2747                 :             :     }
    2748                 :             :   else
    2749                 :             :     {
    2750                 :        1074 :       tree temp = vect_recog_temp_ssa_var (container_type);
    2751                 :        1074 :       if (!ref_sext)
    2752                 :             :         {
    2753                 :         421 :           tree mask = wide_int_to_tree (container_type,
    2754                 :         421 :                                         wi::shifted_mask (shift_n,
    2755                 :             :                                                           mask_width,
    2756                 :             :                                                           false, prec));
    2757                 :         421 :           pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
    2758                 :             :                                               container, mask);
    2759                 :             :         }
    2760                 :             :       else
    2761                 :             :         {
    2762                 :         653 :           HOST_WIDE_INT shl = prec - shift_n - mask_width;
    2763                 :         653 :           shift_n += shl;
    2764                 :         653 :           pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
    2765                 :             :                                               container,
    2766                 :             :                                               build_int_cst (sizetype,
    2767                 :             :                                                              shl));
    2768                 :             :         }
    2769                 :             : 
    2770                 :        1074 :       tree masked = gimple_assign_lhs (pattern_stmt);
    2771                 :        1074 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2772                 :        1074 :       pattern_stmt
    2773                 :        1074 :         = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2774                 :             :                                RSHIFT_EXPR, masked,
    2775                 :             :                                build_int_cst (sizetype, shift_n));
    2776                 :        1074 :       result = gimple_assign_lhs (pattern_stmt);
    2777                 :             :     }
    2778                 :             : 
    2779                 :        1482 :   if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
    2780                 :             :     {
    2781                 :         869 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2782                 :         869 :       pattern_stmt
    2783                 :         869 :         = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
    2784                 :             :                                NOP_EXPR, result);
    2785                 :             :     }
    2786                 :             : 
    2787                 :        1482 :   if (!lhs)
    2788                 :             :     {
    2789                 :         536 :       if (!vectype)
    2790                 :             :         return NULL;
    2791                 :             : 
    2792                 :         428 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
    2793                 :         428 :       vectype = truth_type_for (vectype);
    2794                 :             : 
    2795                 :             :       /* FIXME: This part extracts the boolean value out of the bitfield in the
    2796                 :             :                 same way as vect_recog_gcond_pattern does.  However because
    2797                 :             :                 patterns cannot match the same root twice,  when we handle and
    2798                 :             :                 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
    2799                 :             :                 apply anymore.  We should really fix it so that we don't need to
    2800                 :             :                 duplicate transformations like these.  */
    2801                 :         428 :       tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    2802                 :         428 :       gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
    2803                 :         428 :       tree cond_cst = gimple_cond_rhs (cond_stmt);
    2804                 :         428 :       gimple *new_stmt
    2805                 :         428 :         = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
    2806                 :             :                                gimple_get_lhs (pattern_stmt),
    2807                 :             :                                fold_convert (container_type, cond_cst));
    2808                 :         428 :       append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
    2809                 :         428 :       pattern_stmt
    2810                 :         428 :         = gimple_build_cond (NE_EXPR, new_lhs,
    2811                 :         428 :                              build_zero_cst (TREE_TYPE (new_lhs)),
    2812                 :             :                              NULL_TREE, NULL_TREE);
    2813                 :             :     }
    2814                 :             : 
    2815                 :        1374 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2816                 :        1374 :   vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
    2817                 :             : 
    2818                 :        1374 :   return pattern_stmt;
    2819                 :             : }
    2820                 :             : 
    2821                 :             : /* Function vect_recog_bit_insert_pattern
    2822                 :             : 
    2823                 :             :    Try to find the following pattern:
    2824                 :             : 
    2825                 :             :    written = BIT_INSERT_EXPR (container, value, bitpos);
    2826                 :             : 
    2827                 :             :    Input:
    2828                 :             : 
    2829                 :             :    * STMT_VINFO: The stmt we want to replace.
    2830                 :             : 
    2831                 :             :    Output:
    2832                 :             : 
    2833                 :             :    * TYPE_OUT: The vector type of the output of this pattern.
    2834                 :             : 
    2835                 :             :    * Return value: A new stmt that will be used to replace the sequence of
    2836                 :             :    stmts that constitute the pattern. In this case it will be:
    2837                 :             :    value = (container_type) value;          // Make sure
    2838                 :             :    shifted = value << bitpos;                 // Shift value into place
    2839                 :             :    masked = shifted & (mask << bitpos);           // Mask off the non-relevant bits in
    2840                 :             :                                             // the 'to-write value'.
    2841                 :             :    cleared = container & ~(mask << bitpos); // Clearing the bits we want to
    2842                 :             :                                             // write to from the value we want
    2843                 :             :                                             // to write to.
    2844                 :             :    written = cleared | masked;              // Write bits.
    2845                 :             : 
    2846                 :             : 
    2847                 :             :    where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
    2848                 :             :    bits corresponding to the real size of the bitfield value we are writing to.
    2849                 :             :    The shifting is always optional depending on whether bitpos != 0.
    2850                 :             : 
    2851                 :             : */
    2852                 :             : 
    2853                 :             : static gimple *
    2854                 :    24912925 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
    2855                 :             :                                tree *type_out)
    2856                 :             : {
    2857                 :    24912925 :   gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
    2858                 :    23315176 :   if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
    2859                 :             :     return NULL;
    2860                 :             : 
    2861                 :        1278 :   tree container = gimple_assign_rhs1 (bf_stmt);
    2862                 :        1278 :   tree value = gimple_assign_rhs2 (bf_stmt);
    2863                 :        1278 :   tree shift = gimple_assign_rhs3 (bf_stmt);
    2864                 :             : 
    2865                 :        1278 :   tree bf_type = TREE_TYPE (value);
    2866                 :        1278 :   tree container_type = TREE_TYPE (container);
    2867                 :             : 
    2868                 :        1278 :   if (!INTEGRAL_TYPE_P (container_type)
    2869                 :        1278 :       || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
    2870                 :             :     return NULL;
    2871                 :             : 
    2872                 :         650 :   gimple *pattern_stmt;
    2873                 :             : 
    2874                 :         650 :   vect_unpromoted_value unprom;
    2875                 :         650 :   unprom.set_op (value, vect_internal_def);
    2876                 :         650 :   value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
    2877                 :             :                               get_vectype_for_scalar_type (vinfo,
    2878                 :             :                                                            container_type));
    2879                 :             : 
    2880                 :         650 :   unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
    2881                 :         650 :   unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
    2882                 :         650 :   unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
    2883                 :         650 :   if (BYTES_BIG_ENDIAN)
    2884                 :             :     {
    2885                 :             :       shift_n = prec - shift_n - mask_width;
    2886                 :             :       shift = build_int_cst (TREE_TYPE (shift), shift_n);
    2887                 :             :     }
    2888                 :             : 
    2889                 :         650 :   if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
    2890                 :             :     {
    2891                 :           0 :       pattern_stmt =
    2892                 :           0 :         gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2893                 :             :                              NOP_EXPR, value);
    2894                 :           0 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2895                 :           0 :       value = gimple_get_lhs (pattern_stmt);
    2896                 :             :     }
    2897                 :             : 
    2898                 :             :   /* Shift VALUE into place.  */
    2899                 :         650 :   tree shifted = value;
    2900                 :         650 :   if (shift_n)
    2901                 :             :     {
    2902                 :         360 :       gimple_seq stmts = NULL;
    2903                 :         360 :       shifted
    2904                 :         360 :         = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
    2905                 :         360 :       if (!gimple_seq_empty_p (stmts))
    2906                 :         112 :         append_pattern_def_seq (vinfo, stmt_info,
    2907                 :             :                                 gimple_seq_first_stmt (stmts));
    2908                 :             :     }
    2909                 :             : 
    2910                 :         650 :   tree mask_t
    2911                 :         650 :     = wide_int_to_tree (container_type,
    2912                 :         650 :                         wi::shifted_mask (shift_n, mask_width, false, prec));
    2913                 :             : 
    2914                 :             :   /* Clear bits we don't want to write back from SHIFTED.  */
    2915                 :         650 :   gimple_seq stmts = NULL;
    2916                 :         650 :   tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
    2917                 :             :                               mask_t);
    2918                 :         650 :   if (!gimple_seq_empty_p (stmts))
    2919                 :             :     {
    2920                 :         110 :       pattern_stmt = gimple_seq_first_stmt (stmts);
    2921                 :         110 :       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2922                 :             :     }
    2923                 :             : 
    2924                 :             :   /* Mask off the bits in the container that we are to write to.  */
    2925                 :         650 :   mask_t = wide_int_to_tree (container_type,
    2926                 :         650 :                              wi::shifted_mask (shift_n, mask_width, true, prec));
    2927                 :         650 :   tree cleared = vect_recog_temp_ssa_var (container_type);
    2928                 :         650 :   pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
    2929                 :         650 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
    2930                 :             : 
    2931                 :             :   /* Write MASKED into CLEARED.  */
    2932                 :         650 :   pattern_stmt
    2933                 :         650 :     = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
    2934                 :             :                            BIT_IOR_EXPR, cleared, masked);
    2935                 :             : 
    2936                 :         650 :   *type_out = STMT_VINFO_VECTYPE (stmt_info);
    2937                 :         650 :   vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
    2938                 :             : 
    2939                 :         650 :   return pattern_stmt;
    2940                 :             : }
    2941                 :             : 
    2942                 :             : 
    2943                 :             : /* Recognize cases in which an operation is performed in one type WTYPE
    2944                 :             :    but could be done more efficiently in a narrower type NTYPE.  For example,
    2945                 :             :    if we have:
    2946                 :             : 
    2947                 :             :      ATYPE a;  // narrower than NTYPE
    2948                 :             :      BTYPE b;  // narrower than NTYPE
    2949                 :             :      WTYPE aw = (WTYPE) a;
    2950                 :             :      WTYPE bw = (WTYPE) b;
    2951                 :             :      WTYPE res = aw + bw;  // only uses of aw and bw
    2952                 :             : 
    2953                 :             :    then it would be more efficient to do:
    2954                 :             : 
    2955                 :             :      NTYPE an = (NTYPE) a;
    2956                 :             :      NTYPE bn = (NTYPE) b;
    2957                 :             :      NTYPE resn = an + bn;
    2958                 :             :      WTYPE res = (WTYPE) resn;
    2959                 :             : 
    2960                 :             :    Other situations include things like:
    2961                 :             : 
    2962                 :             :      ATYPE a;  // NTYPE or narrower
    2963                 :             :      WTYPE aw = (WTYPE) a;
    2964                 :             :      WTYPE res = aw + b;
    2965                 :             : 
    2966                 :             :    when only "(NTYPE) res" is significant.  In that case it's more efficient
    2967                 :             :    to truncate "b" and do the operation on NTYPE instead:
    2968                 :             : 
    2969                 :             :      NTYPE an = (NTYPE) a;
    2970                 :             :      NTYPE bn = (NTYPE) b;  // truncation
    2971                 :             :      NTYPE resn = an + bn;
    2972                 :             :      WTYPE res = (WTYPE) resn;
    2973                 :             : 
    2974                 :             :    All users of "res" should then use "resn" instead, making the final
    2975                 :             :    statement dead (not marked as relevant).  The final statement is still
    2976                 :             :    needed to maintain the type correctness of the IR.
    2977                 :             : 
    2978                 :             :    vect_determine_precisions has already determined the minimum
    2979                 :             :    precison of the operation and the minimum precision required
    2980                 :             :    by users of the result.  */
    2981                 :             : 
    2982                 :             : static gimple *
    2983                 :    24913365 : vect_recog_over_widening_pattern (vec_info *vinfo,
    2984                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    2985                 :             : {
    2986                 :    24913365 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    2987                 :    17100933 :   if (!last_stmt)
    2988                 :             :     return NULL;
    2989                 :             : 
    2990                 :             :   /* See whether we have found that this operation can be done on a
    2991                 :             :      narrower type without changing its semantics.  */
    2992                 :    17100933 :   unsigned int new_precision = last_stmt_info->operation_precision;
    2993                 :    17100933 :   if (!new_precision)
    2994                 :             :     return NULL;
    2995                 :             : 
    2996                 :      950952 :   tree lhs = gimple_assign_lhs (last_stmt);
    2997                 :      950952 :   tree type = TREE_TYPE (lhs);
    2998                 :      950952 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    2999                 :             : 
    3000                 :             :   /* Punt for reductions where we don't handle the type conversions.  */
    3001                 :      950952 :   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
    3002                 :             :     return NULL;
    3003                 :             : 
    3004                 :             :   /* Keep the first operand of a COND_EXPR as-is: only the other two
    3005                 :             :      operands are interesting.  */
    3006                 :      948986 :   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
    3007                 :             : 
    3008                 :             :   /* Check the operands.  */
    3009                 :      948986 :   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
    3010                 :      948986 :   auto_vec <vect_unpromoted_value, 3> unprom (nops);
    3011                 :      948986 :   unprom.quick_grow_cleared (nops);
    3012                 :      948986 :   unsigned int min_precision = 0;
    3013                 :      948986 :   bool single_use_p = false;
    3014                 :     2835317 :   for (unsigned int i = 0; i < nops; ++i)
    3015                 :             :     {
    3016                 :     1886795 :       tree op = gimple_op (last_stmt, first_op + i);
    3017                 :     1886795 :       if (TREE_CODE (op) == INTEGER_CST)
    3018                 :      843751 :         unprom[i].set_op (op, vect_constant_def);
    3019                 :     1043044 :       else if (TREE_CODE (op) == SSA_NAME)
    3020                 :             :         {
    3021                 :     1043044 :           bool op_single_use_p = true;
    3022                 :     1043044 :           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
    3023                 :             :                                                      &op_single_use_p))
    3024                 :         464 :             return NULL;
    3025                 :             :           /* If:
    3026                 :             : 
    3027                 :             :              (1) N bits of the result are needed;
    3028                 :             :              (2) all inputs are widened from M<N bits; and
    3029                 :             :              (3) one operand OP is a single-use SSA name
    3030                 :             : 
    3031                 :             :              we can shift the M->N widening from OP to the output
    3032                 :             :              without changing the number or type of extensions involved.
    3033                 :             :              This then reduces the number of copies of STMT_INFO.
    3034                 :             : 
    3035                 :             :              If instead of (3) more than one operand is a single-use SSA name,
    3036                 :             :              shifting the extension to the output is even more of a win.
    3037                 :             : 
    3038                 :             :              If instead:
    3039                 :             : 
    3040                 :             :              (1) N bits of the result are needed;
    3041                 :             :              (2) one operand OP2 is widened from M2<N bits;
    3042                 :             :              (3) another operand OP1 is widened from M1<M2 bits; and
    3043                 :             :              (4) both OP1 and OP2 are single-use
    3044                 :             : 
    3045                 :             :              the choice is between:
    3046                 :             : 
    3047                 :             :              (a) truncating OP2 to M1, doing the operation on M1,
    3048                 :             :                  and then widening the result to N
    3049                 :             : 
    3050                 :             :              (b) widening OP1 to M2, doing the operation on M2, and then
    3051                 :             :                  widening the result to N
    3052                 :             : 
    3053                 :             :              Both shift the M2->N widening of the inputs to the output.
    3054                 :             :              (a) additionally shifts the M1->M2 widening to the output;
    3055                 :             :              it requires fewer copies of STMT_INFO but requires an extra
    3056                 :             :              M2->M1 truncation.
    3057                 :             : 
    3058                 :             :              Which is better will depend on the complexity and cost of
    3059                 :             :              STMT_INFO, which is hard to predict at this stage.  However,
    3060                 :             :              a clear tie-breaker in favor of (b) is the fact that the
    3061                 :             :              truncation in (a) increases the length of the operation chain.
    3062                 :             : 
    3063                 :             :              If instead of (4) only one of OP1 or OP2 is single-use,
    3064                 :             :              (b) is still a win over doing the operation in N bits:
    3065                 :             :              it still shifts the M2->N widening on the single-use operand
    3066                 :             :              to the output and reduces the number of STMT_INFO copies.
    3067                 :             : 
    3068                 :             :              If neither operand is single-use then operating on fewer than
    3069                 :             :              N bits might lead to more extensions overall.  Whether it does
    3070                 :             :              or not depends on global information about the vectorization
    3071                 :             :              region, and whether that's a good trade-off would again
    3072                 :             :              depend on the complexity and cost of the statements involved,
    3073                 :             :              as well as things like register pressure that are not normally
    3074                 :             :              modelled at this stage.  We therefore ignore these cases
    3075                 :             :              and just optimize the clear single-use wins above.
    3076                 :             : 
    3077                 :             :              Thus we take the maximum precision of the unpromoted operands
    3078                 :             :              and record whether any operand is single-use.  */
    3079                 :     1042580 :           if (unprom[i].dt == vect_internal_def)
    3080                 :             :             {
    3081                 :      718887 :               min_precision = MAX (min_precision,
    3082                 :             :                                    TYPE_PRECISION (unprom[i].type));
    3083                 :      718887 :               single_use_p |= op_single_use_p;
    3084                 :             :             }
    3085                 :             :         }
    3086                 :             :       else
    3087                 :             :         return NULL;
    3088                 :             :     }
    3089                 :             : 
    3090                 :             :   /* Although the operation could be done in operation_precision, we have
    3091                 :             :      to balance that against introducing extra truncations or extensions.
    3092                 :             :      Calculate the minimum precision that can be handled efficiently.
    3093                 :             : 
    3094                 :             :      The loop above determined that the operation could be handled
    3095                 :             :      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
    3096                 :             :      extension from the inputs to the output without introducing more
    3097                 :             :      instructions, and would reduce the number of instructions required
    3098                 :             :      for STMT_INFO itself.
    3099                 :             : 
    3100                 :             :      vect_determine_precisions has also determined that the result only
    3101                 :             :      needs min_output_precision bits.  Truncating by a factor of N times
    3102                 :             :      requires a tree of N - 1 instructions, so if TYPE is N times wider
    3103                 :             :      than min_output_precision, doing the operation in TYPE and truncating
    3104                 :             :      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
    3105                 :             :      In contrast:
    3106                 :             : 
    3107                 :             :      - truncating the input to a unary operation and doing the operation
    3108                 :             :        in the new type requires at most N - 1 + 1 = N instructions per
    3109                 :             :        output vector
    3110                 :             : 
    3111                 :             :      - doing the same for a binary operation requires at most
    3112                 :             :        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
    3113                 :             : 
    3114                 :             :      Both unary and binary operations require fewer instructions than
    3115                 :             :      this if the operands were extended from a suitable truncated form.
    3116                 :             :      Thus there is usually nothing to lose by doing operations in
    3117                 :             :      min_output_precision bits, but there can be something to gain.  */
    3118                 :      948522 :   if (!single_use_p)
    3119                 :      695647 :     min_precision = last_stmt_info->min_output_precision;
    3120                 :             :   else
    3121                 :      252875 :     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
    3122                 :             : 
    3123                 :             :   /* Apply the minimum efficient precision we just calculated.  */
    3124                 :      948522 :   if (new_precision < min_precision)
    3125                 :      843302 :     new_precision = min_precision;
    3126                 :      948522 :   new_precision = vect_element_precision (new_precision);
    3127                 :      948522 :   if (new_precision >= TYPE_PRECISION (type))
    3128                 :             :     return NULL;
    3129                 :             : 
    3130                 :      120936 :   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
    3131                 :             : 
    3132                 :      120936 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3133                 :      120936 :   if (!*type_out)
    3134                 :             :     return NULL;
    3135                 :             : 
    3136                 :             :   /* We've found a viable pattern.  Get the new type of the operation.  */
    3137                 :      109221 :   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
    3138                 :      109221 :   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
    3139                 :             : 
    3140                 :             :   /* If we're truncating an operation, we need to make sure that we
    3141                 :             :      don't introduce new undefined overflow.  The codes tested here are
    3142                 :             :      a subset of those accepted by vect_truncatable_operation_p.  */
    3143                 :      109221 :   tree op_type = new_type;
    3144                 :      109221 :   if (TYPE_OVERFLOW_UNDEFINED (new_type)
    3145                 :      141760 :       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
    3146                 :       24481 :     op_type = build_nonstandard_integer_type (new_precision, true);
    3147                 :             : 
    3148                 :             :   /* We specifically don't check here whether the target supports the
    3149                 :             :      new operation, since it might be something that a later pattern
    3150                 :             :      wants to rewrite anyway.  If targets have a minimum element size
    3151                 :             :      for some optabs, we should pattern-match smaller ops to larger ops
    3152                 :             :      where beneficial.  */
    3153                 :      109221 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3154                 :      109221 :   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
    3155                 :      109221 :   if (!new_vectype || !op_vectype)
    3156                 :             :     return NULL;
    3157                 :             : 
    3158                 :      109221 :   if (dump_enabled_p ())
    3159                 :        4086 :     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
    3160                 :             :                      type, new_type);
    3161                 :             : 
    3162                 :             :   /* Calculate the rhs operands for an operation on OP_TYPE.  */
    3163                 :      109221 :   tree ops[3] = {};
    3164                 :      109311 :   for (unsigned int i = 1; i < first_op; ++i)
    3165                 :          90 :     ops[i - 1] = gimple_op (last_stmt, i);
    3166                 :      109221 :   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
    3167                 :      109221 :                        op_type, &unprom[0], op_vectype);
    3168                 :             : 
    3169                 :             :   /* Use the operation to produce a result of type OP_TYPE.  */
    3170                 :      109221 :   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
    3171                 :      109221 :   gimple *pattern_stmt = gimple_build_assign (new_var, code,
    3172                 :             :                                               ops[0], ops[1], ops[2]);
    3173                 :      109221 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3174                 :             : 
    3175                 :      109221 :   if (dump_enabled_p ())
    3176                 :        4086 :     dump_printf_loc (MSG_NOTE, vect_location,
    3177                 :             :                      "created pattern stmt: %G", pattern_stmt);
    3178                 :             : 
    3179                 :             :   /* Convert back to the original signedness, if OP_TYPE is different
    3180                 :             :      from NEW_TYPE.  */
    3181                 :      109221 :   if (op_type != new_type)
    3182                 :       24481 :     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
    3183                 :             :                                         pattern_stmt, op_vectype);
    3184                 :             : 
    3185                 :             :   /* Promote the result to the original type.  */
    3186                 :      109221 :   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
    3187                 :             :                                       pattern_stmt, new_vectype);
    3188                 :             : 
    3189                 :      109221 :   return pattern_stmt;
    3190                 :      948986 : }
    3191                 :             : 
    3192                 :             : /* Recognize the following patterns:
    3193                 :             : 
    3194                 :             :      ATYPE a;  // narrower than TYPE
    3195                 :             :      BTYPE b;  // narrower than TYPE
    3196                 :             : 
    3197                 :             :    1) Multiply high with scaling
    3198                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> c;
    3199                 :             :      Here, c is bitsize (TYPE) / 2 - 1.
    3200                 :             : 
    3201                 :             :    2) ... or also with rounding
    3202                 :             :      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
    3203                 :             :      Here, d is bitsize (TYPE) / 2 - 2.
    3204                 :             : 
    3205                 :             :    3) Normal multiply high
    3206                 :             :      TYPE res = ((TYPE) a * (TYPE) b) >> e;
    3207                 :             :      Here, e is bitsize (TYPE) / 2.
    3208                 :             : 
    3209                 :             :    where only the bottom half of res is used.  */
    3210                 :             : 
    3211                 :             : static gimple *
    3212                 :    25017182 : vect_recog_mulhs_pattern (vec_info *vinfo,
    3213                 :             :                           stmt_vec_info last_stmt_info, tree *type_out)
    3214                 :             : {
    3215                 :             :   /* Check for a right shift.  */
    3216                 :    25017182 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3217                 :    17204628 :   if (!last_stmt
    3218                 :    17204628 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
    3219                 :             :     return NULL;
    3220                 :             : 
    3221                 :             :   /* Check that the shift result is wider than the users of the
    3222                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3223                 :      281105 :   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    3224                 :      281105 :   unsigned int target_precision
    3225                 :      281105 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3226                 :      281105 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3227                 :      281105 :       || target_precision >= TYPE_PRECISION (lhs_type))
    3228                 :             :     return NULL;
    3229                 :             : 
    3230                 :             :   /* Look through any change in sign on the outer shift input.  */
    3231                 :       39296 :   vect_unpromoted_value unprom_rshift_input;
    3232                 :       39296 :   tree rshift_input = vect_look_through_possible_promotion
    3233                 :       39296 :     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
    3234                 :       39296 :   if (!rshift_input
    3235                 :       39296 :       || TYPE_PRECISION (TREE_TYPE (rshift_input))
    3236                 :       38630 :            != TYPE_PRECISION (lhs_type))
    3237                 :             :     return NULL;
    3238                 :             : 
    3239                 :             :   /* Get the definition of the shift input.  */
    3240                 :       36526 :   stmt_vec_info rshift_input_stmt_info
    3241                 :       36526 :     = vect_get_internal_def (vinfo, rshift_input);
    3242                 :       36526 :   if (!rshift_input_stmt_info)
    3243                 :             :     return NULL;
    3244                 :       32261 :   gassign *rshift_input_stmt
    3245                 :    25045528 :     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
    3246                 :       28415 :   if (!rshift_input_stmt)
    3247                 :             :     return NULL;
    3248                 :             : 
    3249                 :       28415 :   stmt_vec_info mulh_stmt_info;
    3250                 :       28415 :   tree scale_term;
    3251                 :       28415 :   bool rounding_p = false;
    3252                 :             : 
    3253                 :             :   /* Check for the presence of the rounding term.  */
    3254                 :       34517 :   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
    3255                 :             :     {
    3256                 :             :       /* Check that the outer shift was by 1.  */
    3257                 :       16582 :       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
    3258                 :        8243 :         return NULL;
    3259                 :             : 
    3260                 :             :       /* Check that the second operand of the PLUS_EXPR is 1.  */
    3261                 :        1704 :       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
    3262                 :             :         return NULL;
    3263                 :             : 
    3264                 :             :       /* Look through any change in sign on the addition input.  */
    3265                 :          91 :       vect_unpromoted_value unprom_plus_input;
    3266                 :          91 :       tree plus_input = vect_look_through_possible_promotion
    3267                 :          91 :         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
    3268                 :          91 :       if (!plus_input
    3269                 :          91 :            || TYPE_PRECISION (TREE_TYPE (plus_input))
    3270                 :          91 :                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
    3271                 :             :         return NULL;
    3272                 :             : 
    3273                 :             :       /* Get the definition of the multiply-high-scale part.  */
    3274                 :          91 :       stmt_vec_info plus_input_stmt_info
    3275                 :          91 :         = vect_get_internal_def (vinfo, plus_input);
    3276                 :          91 :       if (!plus_input_stmt_info)
    3277                 :             :         return NULL;
    3278                 :          91 :       gassign *plus_input_stmt
    3279                 :        8334 :         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
    3280                 :          91 :       if (!plus_input_stmt
    3281                 :          91 :           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
    3282                 :             :         return NULL;
    3283                 :             : 
    3284                 :             :       /* Look through any change in sign on the scaling input.  */
    3285                 :          48 :       vect_unpromoted_value unprom_scale_input;
    3286                 :          48 :       tree scale_input = vect_look_through_possible_promotion
    3287                 :          48 :         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
    3288                 :          48 :       if (!scale_input
    3289                 :          48 :           || TYPE_PRECISION (TREE_TYPE (scale_input))
    3290                 :          48 :                != TYPE_PRECISION (TREE_TYPE (plus_input)))
    3291                 :             :         return NULL;
    3292                 :             : 
    3293                 :             :       /* Get the definition of the multiply-high part.  */
    3294                 :          48 :       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
    3295                 :          48 :       if (!mulh_stmt_info)
    3296                 :             :         return NULL;
    3297                 :             : 
    3298                 :             :       /* Get the scaling term.  */
    3299                 :          48 :       scale_term = gimple_assign_rhs2 (plus_input_stmt);
    3300                 :          48 :       rounding_p = true;
    3301                 :             :     }
    3302                 :             :   else
    3303                 :             :     {
    3304                 :       20124 :       mulh_stmt_info = rshift_input_stmt_info;
    3305                 :       20124 :       scale_term = gimple_assign_rhs2 (last_stmt);
    3306                 :             :     }
    3307                 :             : 
    3308                 :             :   /* Check that the scaling factor is constant.  */
    3309                 :       20172 :   if (TREE_CODE (scale_term) != INTEGER_CST)
    3310                 :             :     return NULL;
    3311                 :             : 
    3312                 :             :   /* Check whether the scaling input term can be seen as two widened
    3313                 :             :      inputs multiplied together.  */
    3314                 :       57702 :   vect_unpromoted_value unprom_mult[2];
    3315                 :       19234 :   tree new_type;
    3316                 :       19234 :   unsigned int nops
    3317                 :       19234 :     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
    3318                 :             :                             false, 2, unprom_mult, &new_type);
    3319                 :       19234 :   if (nops != 2)
    3320                 :             :     return NULL;
    3321                 :             : 
    3322                 :             :   /* Adjust output precision.  */
    3323                 :        2248 :   if (TYPE_PRECISION (new_type) < target_precision)
    3324                 :           0 :     new_type = build_nonstandard_integer_type
    3325                 :           0 :       (target_precision, TYPE_UNSIGNED (new_type));
    3326                 :             : 
    3327                 :        2248 :   unsigned mult_precision = TYPE_PRECISION (new_type);
    3328                 :        2248 :   internal_fn ifn;
    3329                 :             :   /* Check that the scaling factor is expected.  Instead of
    3330                 :             :      target_precision, we should use the one that we actually
    3331                 :             :      use for internal function.  */
    3332                 :        2248 :   if (rounding_p)
    3333                 :             :     {
    3334                 :             :       /* Check pattern 2).  */
    3335                 :          96 :       if (wi::to_widest (scale_term) + mult_precision + 2
    3336                 :         144 :           != TYPE_PRECISION (lhs_type))
    3337                 :             :         return NULL;
    3338                 :             : 
    3339                 :             :       ifn = IFN_MULHRS;
    3340                 :             :     }
    3341                 :             :   else
    3342                 :             :     {
    3343                 :             :       /* Check for pattern 1).  */
    3344                 :        4400 :       if (wi::to_widest (scale_term) + mult_precision + 1
    3345                 :        6600 :           == TYPE_PRECISION (lhs_type))
    3346                 :             :         ifn = IFN_MULHS;
    3347                 :             :       /* Check for pattern 3).  */
    3348                 :        2156 :       else if (wi::to_widest (scale_term) + mult_precision
    3349                 :        4312 :                == TYPE_PRECISION (lhs_type))
    3350                 :             :         ifn = IFN_MULH;
    3351                 :             :       else
    3352                 :             :         return NULL;
    3353                 :             :     }
    3354                 :             : 
    3355                 :        2176 :   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
    3356                 :             : 
    3357                 :             :   /* Check for target support.  */
    3358                 :        2176 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3359                 :        2176 :   if (!new_vectype
    3360                 :        4336 :       || !direct_internal_fn_supported_p
    3361                 :        2160 :             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3362                 :        2107 :     return NULL;
    3363                 :             : 
    3364                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3365                 :             :      it's likely to be discarded.  */
    3366                 :          69 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3367                 :          69 :   if (!*type_out)
    3368                 :             :     return NULL;
    3369                 :             : 
    3370                 :             :   /* Generate the IFN_MULHRS call.  */
    3371                 :          69 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3372                 :          69 :   tree new_ops[2];
    3373                 :          69 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3374                 :             :                        unprom_mult, new_vectype);
    3375                 :          69 :   gcall *mulhrs_stmt
    3376                 :          69 :     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
    3377                 :          69 :   gimple_call_set_lhs (mulhrs_stmt, new_var);
    3378                 :          69 :   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
    3379                 :             : 
    3380                 :          69 :   if (dump_enabled_p ())
    3381                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
    3382                 :             :                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
    3383                 :             : 
    3384                 :          69 :   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
    3385                 :          69 :                               mulhrs_stmt, new_vectype);
    3386                 :             : }
    3387                 :             : 
    3388                 :             : /* Recognize the patterns:
    3389                 :             : 
    3390                 :             :             ATYPE a;  // narrower than TYPE
    3391                 :             :             BTYPE b;  // narrower than TYPE
    3392                 :             :         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
    3393                 :             :      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
    3394                 :             : 
    3395                 :             :    where only the bottom half of avg is used.  Try to transform them into:
    3396                 :             : 
    3397                 :             :         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
    3398                 :             :      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
    3399                 :             : 
    3400                 :             :   followed by:
    3401                 :             : 
    3402                 :             :             TYPE avg = (TYPE) avg';
    3403                 :             : 
    3404                 :             :   where NTYPE is no wider than half of TYPE.  Since only the bottom half
    3405                 :             :   of avg is used, all or part of the cast of avg' should become redundant.
    3406                 :             : 
    3407                 :             :   If there is no target support available, generate code to distribute rshift
    3408                 :             :   over plus and add a carry.  */
    3409                 :             : 
    3410                 :             : static gimple *
    3411                 :    25015492 : vect_recog_average_pattern (vec_info *vinfo,
    3412                 :             :                             stmt_vec_info last_stmt_info, tree *type_out)
    3413                 :             : {
    3414                 :             :   /* Check for a shift right by one bit.  */
    3415                 :    25015492 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3416                 :    17203060 :   if (!last_stmt
    3417                 :    17203060 :       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
    3418                 :      280961 :       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
    3419                 :    24975475 :     return NULL;
    3420                 :             : 
    3421                 :             :   /* Check that the shift result is wider than the users of the
    3422                 :             :      result need (i.e. that narrowing would be a natural choice).  */
    3423                 :       40017 :   tree lhs = gimple_assign_lhs (last_stmt);
    3424                 :       40017 :   tree type = TREE_TYPE (lhs);
    3425                 :       40017 :   unsigned int target_precision
    3426                 :       40017 :     = vect_element_precision (last_stmt_info->min_output_precision);
    3427                 :       40017 :   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
    3428                 :             :     return NULL;
    3429                 :             : 
    3430                 :             :   /* Look through any change in sign on the shift input.  */
    3431                 :        2455 :   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
    3432                 :        2455 :   vect_unpromoted_value unprom_plus;
    3433                 :        2455 :   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
    3434                 :             :                                                      &unprom_plus);
    3435                 :        2455 :   if (!rshift_rhs
    3436                 :        2455 :       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
    3437                 :             :     return NULL;
    3438                 :             : 
    3439                 :             :   /* Get the definition of the shift input.  */
    3440                 :        2445 :   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
    3441                 :        2445 :   if (!plus_stmt_info)
    3442                 :             :     return NULL;
    3443                 :             : 
    3444                 :             :   /* Check whether the shift input can be seen as a tree of additions on
    3445                 :             :      2 or 3 widened inputs.
    3446                 :             : 
    3447                 :             :      Note that the pattern should be a win even if the result of one or
    3448                 :             :      more additions is reused elsewhere: if the pattern matches, we'd be
    3449                 :             :      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
    3450                 :        9708 :   internal_fn ifn = IFN_AVG_FLOOR;
    3451                 :        9708 :   vect_unpromoted_value unprom[3];
    3452                 :        2427 :   tree new_type;
    3453                 :        2427 :   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
    3454                 :             :                                             IFN_VEC_WIDEN_PLUS, false, 3,
    3455                 :             :                                             unprom, &new_type);
    3456                 :        2427 :   if (nops == 0)
    3457                 :             :     return NULL;
    3458                 :        1104 :   if (nops == 3)
    3459                 :             :     {
    3460                 :             :       /* Check that one operand is 1.  */
    3461                 :             :       unsigned int i;
    3462                 :         921 :       for (i = 0; i < 3; ++i)
    3463                 :         861 :         if (integer_onep (unprom[i].op))
    3464                 :             :           break;
    3465                 :         287 :       if (i == 3)
    3466                 :             :         return NULL;
    3467                 :             :       /* Throw away the 1 operand and keep the other two.  */
    3468                 :         227 :       if (i < 2)
    3469                 :           0 :         unprom[i] = unprom[2];
    3470                 :             :       ifn = IFN_AVG_CEIL;
    3471                 :             :     }
    3472                 :             : 
    3473                 :        1044 :   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
    3474                 :             : 
    3475                 :             :   /* We know that:
    3476                 :             : 
    3477                 :             :      (a) the operation can be viewed as:
    3478                 :             : 
    3479                 :             :            TYPE widened0 = (TYPE) UNPROM[0];
    3480                 :             :            TYPE widened1 = (TYPE) UNPROM[1];
    3481                 :             :            TYPE tmp1 = widened0 + widened1 {+ 1};
    3482                 :             :            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
    3483                 :             : 
    3484                 :             :      (b) the first two statements are equivalent to:
    3485                 :             : 
    3486                 :             :            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
    3487                 :             :            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
    3488                 :             : 
    3489                 :             :      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
    3490                 :             :          where sensible;
    3491                 :             : 
    3492                 :             :      (d) all the operations can be performed correctly at twice the width of
    3493                 :             :          NEW_TYPE, due to the nature of the average operation; and
    3494                 :             : 
    3495                 :             :      (e) users of the result of the right shift need only TARGET_PRECISION
    3496                 :             :          bits, where TARGET_PRECISION is no more than half of TYPE's
    3497                 :             :          precision.
    3498                 :             : 
    3499                 :             :      Under these circumstances, the only situation in which NEW_TYPE
    3500                 :             :      could be narrower than TARGET_PRECISION is if widened0, widened1
    3501                 :             :      and an addition result are all used more than once.  Thus we can
    3502                 :             :      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
    3503                 :             :      as "free", whereas widening the result of the average instruction
    3504                 :             :      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
    3505                 :             :      therefore better not to go narrower than TARGET_PRECISION.  */
    3506                 :        1044 :   if (TYPE_PRECISION (new_type) < target_precision)
    3507                 :           8 :     new_type = build_nonstandard_integer_type (target_precision,
    3508                 :           8 :                                                TYPE_UNSIGNED (new_type));
    3509                 :             : 
    3510                 :             :   /* Check for target support.  */
    3511                 :        1044 :   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
    3512                 :        1044 :   if (!new_vectype)
    3513                 :             :     return NULL;
    3514                 :             : 
    3515                 :        1044 :   bool fallback_p = false;
    3516                 :             : 
    3517                 :        1044 :   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
    3518                 :             :     ;
    3519                 :         922 :   else if (TYPE_UNSIGNED (new_type)
    3520                 :         267 :            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
    3521                 :         267 :            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
    3522                 :         267 :            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
    3523                 :        1189 :            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
    3524                 :             :     fallback_p = true;
    3525                 :             :   else
    3526                 :         655 :     return NULL;
    3527                 :             : 
    3528                 :             :   /* The IR requires a valid vector type for the cast result, even though
    3529                 :             :      it's likely to be discarded.  */
    3530                 :         389 :   *type_out = get_vectype_for_scalar_type (vinfo, type);
    3531                 :         389 :   if (!*type_out)
    3532                 :             :     return NULL;
    3533                 :             : 
    3534                 :         388 :   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
    3535                 :         388 :   tree new_ops[2];
    3536                 :         388 :   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
    3537                 :             :                        unprom, new_vectype);
    3538                 :             : 
    3539                 :         388 :   if (fallback_p)
    3540                 :             :     {
    3541                 :             :       /* As a fallback, generate code for following sequence:
    3542                 :             : 
    3543                 :             :          shifted_op0 = new_ops[0] >> 1;
    3544                 :             :          shifted_op1 = new_ops[1] >> 1;
    3545                 :             :          sum_of_shifted = shifted_op0 + shifted_op1;
    3546                 :             :          unmasked_carry = new_ops[0] and/or new_ops[1];
    3547                 :             :          carry = unmasked_carry & 1;
    3548                 :             :          new_var = sum_of_shifted + carry;
    3549                 :             :       */         
    3550                 :             : 
    3551                 :         266 :       tree one_cst = build_one_cst (new_type);
    3552                 :         266 :       gassign *g;
    3553                 :             : 
    3554                 :         266 :       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
    3555                 :         266 :       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
    3556                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3557                 :             : 
    3558                 :         266 :       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
    3559                 :         266 :       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
    3560                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3561                 :             : 
    3562                 :         266 :       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
    3563                 :         266 :       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
    3564                 :             :                                shifted_op0, shifted_op1);
    3565                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3566                 :             :       
    3567                 :         266 :       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
    3568                 :         266 :       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
    3569                 :         266 :       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
    3570                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3571                 :             :  
    3572                 :         266 :       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
    3573                 :         266 :       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
    3574                 :         266 :       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
    3575                 :             : 
    3576                 :         266 :       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
    3577                 :         266 :       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
    3578                 :             :     }
    3579                 :             : 
    3580                 :             :   /* Generate the IFN_AVG* call.  */
    3581                 :         122 :   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
    3582                 :             :                                                     new_ops[1]);
    3583                 :         122 :   gimple_call_set_lhs (average_stmt, new_var);
    3584                 :         122 :   gimple_set_location (average_stmt, gimple_location (last_stmt));
    3585                 :             : 
    3586                 :         122 :   if (dump_enabled_p ())
    3587                 :          42 :     dump_printf_loc (MSG_NOTE, vect_location,
    3588                 :             :                      "created pattern stmt: %G", (gimple *) average_stmt);
    3589                 :             : 
    3590                 :         122 :   return vect_convert_output (vinfo, last_stmt_info,
    3591                 :         122 :                               type, average_stmt, new_vectype);
    3592                 :             : }
    3593                 :             : 
    3594                 :             : /* Recognize cases in which the input to a cast is wider than its
    3595                 :             :    output, and the input is fed by a widening operation.  Fold this
    3596                 :             :    by removing the unnecessary intermediate widening.  E.g.:
    3597                 :             : 
    3598                 :             :      unsigned char a;
    3599                 :             :      unsigned int b = (unsigned int) a;
    3600                 :             :      unsigned short c = (unsigned short) b;
    3601                 :             : 
    3602                 :             :    -->
    3603                 :             : 
    3604                 :             :      unsigned short c = (unsigned short) a;
    3605                 :             : 
    3606                 :             :    Although this is rare in input IR, it is an expected side-effect
    3607                 :             :    of the over-widening pattern above.
    3608                 :             : 
    3609                 :             :    This is beneficial also for integer-to-float conversions, if the
    3610                 :             :    widened integer has more bits than the float, and if the unwidened
    3611                 :             :    input doesn't.  */
    3612                 :             : 
    3613                 :             : static gimple *
    3614                 :    25017182 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
    3615                 :             :                                   stmt_vec_info last_stmt_info, tree *type_out)
    3616                 :             : {
    3617                 :             :   /* Check for a cast, including an integer-to-float conversion.  */
    3618                 :    42183913 :   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
    3619                 :    17204559 :   if (!last_stmt)
    3620                 :             :     return NULL;
    3621                 :    17204559 :   tree_code code = gimple_assign_rhs_code (last_stmt);
    3622                 :    17204559 :   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
    3623                 :             :     return NULL;
    3624                 :             : 
    3625                 :             :   /* Make sure that the rhs is a scalar with a natural bitsize.  */
    3626                 :     2423568 :   tree lhs = gimple_assign_lhs (last_stmt);
    3627                 :     2423568 :   if (!lhs)
    3628                 :             :     return NULL;
    3629                 :     2423568 :   tree lhs_type = TREE_TYPE (lhs);
    3630                 :     2423568 :   scalar_mode lhs_mode;
    3631                 :     2406823 :   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
    3632                 :     4827818 :       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
    3633                 :       24590 :     return NULL;
    3634                 :             : 
    3635                 :             :   /* Check for a narrowing operation (from a vector point of view).  */
    3636                 :     2398978 :   tree rhs = gimple_assign_rhs1 (last_stmt);
    3637                 :     2398978 :   tree rhs_type = TREE_TYPE (rhs);
    3638                 :     2398978 :   if (!INTEGRAL_TYPE_P (rhs_type)
    3639                 :     2150800 :       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
    3640                 :     6541028 :       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
    3641                 :             :     return NULL;
    3642                 :             : 
    3643                 :             :   /* Try to find an unpromoted input.  */
    3644                 :      295997 :   vect_unpromoted_value unprom;
    3645                 :      295997 :   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
    3646                 :      295997 :       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
    3647                 :             :     return NULL;
    3648                 :             : 
    3649                 :             :   /* If the bits above RHS_TYPE matter, make sure that they're the
    3650                 :             :      same when extending from UNPROM as they are when extending from RHS.  */
    3651                 :       37948 :   if (!INTEGRAL_TYPE_P (lhs_type)
    3652                 :       37948 :       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
    3653                 :             :     return NULL;
    3654                 :             : 
    3655                 :             :   /* We can get the same result by casting UNPROM directly, to avoid
    3656                 :             :      the unnecessary widening and narrowing.  */
    3657                 :       37828 :   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
    3658                 :             : 
    3659                 :       37828 :   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
    3660                 :       37828 :   if (!*type_out)
    3661                 :             :     return NULL;
    3662                 :             : 
    3663                 :       37828 :   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
    3664                 :       37828 :   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
    3665                 :       37828 :   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    3666                 :             : 
    3667                 :       37828 :   return pattern_stmt;
    3668                 :             : }
    3669                 :             : 
    3670                 :             : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
    3671                 :             :    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
    3672                 :             : 
    3673                 :             : static gimple *
    3674                 :    24962040 : vect_recog_widen_shift_pattern (vec_info *vinfo,
    3675                 :             :                                 stmt_vec_info last_stmt_info, tree *type_out)
    3676                 :             : {
    3677                 :    24962040 :   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
    3678                 :             :                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
    3679                 :    24962040 :                                       "vect_recog_widen_shift_pattern");
    3680                 :             : }
    3681                 :             : 
    3682                 :             : /* Detect a rotate pattern wouldn't be otherwise vectorized:
    3683                 :             : 
    3684                 :             :    type a_t, b_t, c_t;
    3685                 :             : 
    3686                 :             :    S0 a_t = b_t r<< c_t;
    3687                 :             : 
    3688                 :             :   Input/Output:
    3689                 :             : 
    3690                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    3691                 :             :     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
    3692                 :             :     with a sequence:
    3693                 :             : 
    3694                 :             :    S1 d_t = -c_t;
    3695                 :             :    S2 e_t = d_t & (B - 1);
    3696                 :             :    S3 f_t = b_t << c_t;
    3697                 :             :    S4 g_t = b_t >> e_t;
    3698                 :             :    S0 a_t = f_t | g_t;
    3699                 :             : 
    3700                 :             :     where B is element bitsize of type.
    3701                 :             : 
    3702                 :             :   Output:
    3703                 :             : 
    3704                 :             :   * TYPE_OUT: The type of the output of this pattern.
    3705                 :             : 
    3706                 :             :   * Return value: A new stmt that will be used to replace the rotate
    3707                 :             :     S0 stmt.  */
    3708                 :             : 
    3709                 :             : static gimple *
    3710                 :    24962040 : vect_recog_rotate_pattern (vec_info *vinfo,
    3711                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    3712                 :             : {
    3713                 :    24962040 :   gimple *last_stmt = stmt_vinfo->stmt;
    3714                 :    24962040 :   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
    3715                 :    24962040 :   gimple *pattern_stmt, *def_stmt;
    3716                 :    24962040 :   enum tree_code rhs_code;
    3717                 :    24962040 :   enum vect_def_type dt;
    3718                 :    24962040 :   optab optab1, optab2;
    3719                 :    24962040 :   edge ext_def = NULL;
    3720                 :    24962040 :   bool bswap16_p = false;
    3721                 :             : 
    3722                 :    24962040 :   if (is_gimple_assign (last_stmt))
    3723                 :             :     {
    3724                 :    17149388 :       rhs_code = gimple_assign_rhs_code (last_stmt);
    3725                 :    17149388 :       switch (rhs_code)
    3726                 :             :         {
    3727                 :        3963 :         case LROTATE_EXPR:
    3728                 :        3963 :         case RROTATE_EXPR:
    3729                 :        3963 :           break;
    3730                 :             :         default:
    3731                 :             :           return NULL;
    3732                 :             :         }
    3733                 :             : 
    3734                 :        3963 :       lhs = gimple_assign_lhs (last_stmt);
    3735                 :        3963 :       oprnd0 = gimple_assign_rhs1 (last_stmt);
    3736                 :        3963 :       type = TREE_TYPE (oprnd0);
    3737                 :        3963 :       oprnd1 = gimple_assign_rhs2 (last_stmt);
    3738                 :             :     }
    3739                 :     7812652 :   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
    3740                 :             :     {
    3741                 :             :       /* __builtin_bswap16 (x) is another form of x r>> 8.
    3742                 :             :          The vectorizer has bswap support, but only if the argument isn't
    3743                 :             :          promoted.  */
    3744                 :         136 :       lhs = gimple_call_lhs (last_stmt);
    3745                 :         136 :       oprnd0 = gimple_call_arg (last_stmt, 0);
    3746                 :         136 :       type = TREE_TYPE (oprnd0);
    3747                 :         136 :       if (!lhs
    3748                 :         136 :           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
    3749                 :         136 :           || TYPE_PRECISION (type) <= 16
    3750                 :         124 :           || TREE_CODE (oprnd0) != SSA_NAME
    3751                 :         260 :           || BITS_PER_UNIT != 8)
    3752                 :          19 :         return NULL;
    3753                 :             : 
    3754                 :         124 :       stmt_vec_info def_stmt_info;
    3755                 :         124 :       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
    3756                 :             :         return NULL;
    3757                 :             : 
    3758                 :         124 :       if (dt != vect_internal_def)
    3759                 :             :         return NULL;
    3760                 :             : 
    3761                 :         120 :       if (gimple_assign_cast_p (def_stmt))
    3762                 :             :         {
    3763                 :          70 :           def = gimple_assign_rhs1 (def_stmt);
    3764                 :         140 :           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
    3765                 :         140 :               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
    3766                 :             :             oprnd0 = def;
    3767                 :             :         }
    3768                 :             : 
    3769                 :         120 :       type = TREE_TYPE (lhs);
    3770                 :         120 :       vectype = get_vectype_for_scalar_type (vinfo, type);
    3771                 :         120 :       if (vectype == NULL_TREE)
    3772                 :             :         return NULL;
    3773                 :             : 
    3774                 :         120 :       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
    3775                 :             :         {
    3776                 :             :           /* The encoding uses one stepped pattern for each byte in the
    3777                 :             :              16-bit word.  */
    3778                 :         120 :           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
    3779                 :         480 :           for (unsigned i = 0; i < 3; ++i)
    3780                 :        1080 :             for (unsigned j = 0; j < 2; ++j)
    3781                 :         720 :               elts.quick_push ((i + 1) * 2 - j - 1);
    3782                 :             : 
    3783                 :         120 :           vec_perm_indices indices (elts, 1,
    3784                 :         120 :                                     TYPE_VECTOR_SUBPARTS (char_vectype));
    3785                 :         120 :           machine_mode vmode = TYPE_MODE (char_vectype);
    3786                 :         120 :           if (can_vec_perm_const_p (vmode, vmode, indices))
    3787                 :             :             {
    3788                 :             :               /* vectorizable_bswap can handle the __builtin_bswap16 if we
    3789                 :             :                  undo the argument promotion.  */
    3790                 :           3 :               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3791                 :             :                 {
    3792                 :           0 :                   def = vect_recog_temp_ssa_var (type, NULL);
    3793                 :           0 :                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3794                 :           0 :                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3795                 :           0 :                   oprnd0 = def;
    3796                 :             :                 }
    3797                 :             : 
    3798                 :             :               /* Pattern detected.  */
    3799                 :           3 :               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3800                 :             : 
    3801                 :           3 :               *type_out = vectype;
    3802                 :             : 
    3803                 :             :               /* Pattern supported.  Create a stmt to be used to replace the
    3804                 :             :                  pattern, with the unpromoted argument.  */
    3805                 :           3 :               var = vect_recog_temp_ssa_var (type, NULL);
    3806                 :           3 :               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
    3807                 :             :                                                 1, oprnd0);
    3808                 :           3 :               gimple_call_set_lhs (pattern_stmt, var);
    3809                 :           3 :               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
    3810                 :             :                                       gimple_call_fntype (last_stmt));
    3811                 :           3 :               return pattern_stmt;
    3812                 :             :             }
    3813                 :         120 :         }
    3814                 :             : 
    3815                 :         117 :       oprnd1 = build_int_cst (integer_type_node, 8);
    3816                 :         117 :       rhs_code = LROTATE_EXPR;
    3817                 :         117 :       bswap16_p = true;
    3818                 :             :     }
    3819                 :             :   else
    3820                 :             :     return NULL;
    3821                 :             : 
    3822                 :        4080 :   if (TREE_CODE (oprnd0) != SSA_NAME
    3823                 :        3958 :       || !INTEGRAL_TYPE_P (type)
    3824                 :        8031 :       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
    3825                 :             :     return NULL;
    3826                 :             : 
    3827                 :        3951 :   stmt_vec_info def_stmt_info;
    3828                 :        3951 :   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
    3829                 :             :     return NULL;
    3830                 :             : 
    3831                 :        3951 :   if (dt != vect_internal_def
    3832                 :        3739 :       && dt != vect_constant_def
    3833                 :          21 :       && dt != vect_external_def)
    3834                 :             :     return NULL;
    3835                 :             : 
    3836                 :        3945 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    3837                 :        3945 :   if (vectype == NULL_TREE)
    3838                 :             :     return NULL;
    3839                 :             : 
    3840                 :             :   /* If vector/vector or vector/scalar rotate is supported by the target,
    3841                 :             :      don't do anything here.  */
    3842                 :        3802 :   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
    3843                 :        3802 :   if (optab1
    3844                 :        3802 :       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
    3845                 :             :     {
    3846                 :          34 :      use_rotate:
    3847                 :          34 :       if (bswap16_p)
    3848                 :             :         {
    3849                 :           0 :           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
    3850                 :             :             {
    3851                 :           0 :               def = vect_recog_temp_ssa_var (type, NULL);
    3852                 :           0 :               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3853                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    3854                 :           0 :               oprnd0 = def;
    3855                 :             :             }
    3856                 :             : 
    3857                 :             :           /* Pattern detected.  */
    3858                 :           0 :           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3859                 :             : 
    3860                 :           0 :           *type_out = vectype;
    3861                 :             : 
    3862                 :             :           /* Pattern supported.  Create a stmt to be used to replace the
    3863                 :             :              pattern.  */
    3864                 :           0 :           var = vect_recog_temp_ssa_var (type, NULL);
    3865                 :           0 :           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
    3866                 :             :                                               oprnd1);
    3867                 :           0 :           return pattern_stmt;
    3868                 :             :         }
    3869                 :             :       return NULL;
    3870                 :             :     }
    3871                 :             : 
    3872                 :        3768 :   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
    3873                 :             :     {
    3874                 :        3707 :       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
    3875                 :        3707 :       if (optab2
    3876                 :        3707 :           && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
    3877                 :           0 :         goto use_rotate;
    3878                 :             :     }
    3879                 :             : 
    3880                 :        3768 :   tree utype = unsigned_type_for (type);
    3881                 :        3768 :   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
    3882                 :        3768 :   if (!uvectype)
    3883                 :             :     return NULL;
    3884                 :             : 
    3885                 :             :   /* If vector/vector or vector/scalar shifts aren't supported by the target,
    3886                 :             :      don't do anything here either.  */
    3887                 :        3768 :   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
    3888                 :        3768 :   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
    3889                 :        3768 :   if (!optab1
    3890                 :        3768 :       || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
    3891                 :         331 :       || !optab2
    3892                 :        4099 :       || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
    3893                 :             :     {
    3894                 :        3437 :       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
    3895                 :             :         return NULL;
    3896                 :        3394 :       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
    3897                 :        3394 :       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
    3898                 :        3394 :       if (!optab1
    3899                 :        3394 :           || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
    3900                 :        2869 :           || !optab2
    3901                 :        6263 :           || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
    3902                 :         525 :         return NULL;
    3903                 :             :     }
    3904                 :             : 
    3905                 :        3200 :   *type_out = vectype;
    3906                 :             : 
    3907                 :        3200 :   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
    3908                 :             :     {
    3909                 :         101 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3910                 :         101 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
    3911                 :         101 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3912                 :         101 :       oprnd0 = def;
    3913                 :             :     }
    3914                 :             : 
    3915                 :        3200 :   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
    3916                 :          13 :     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
    3917                 :             : 
    3918                 :        3200 :   def = NULL_TREE;
    3919                 :        3200 :   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
    3920                 :        3200 :   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
    3921                 :             :     def = oprnd1;
    3922                 :          31 :   else if (def_stmt && gimple_assign_cast_p (def_stmt))
    3923                 :             :     {
    3924                 :           0 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    3925                 :           0 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
    3926                 :           0 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    3927                 :           0 :              == TYPE_PRECISION (type))
    3928                 :             :         def = rhs1;
    3929                 :             :     }
    3930                 :             : 
    3931                 :        3169 :   if (def == NULL_TREE)
    3932                 :             :     {
    3933                 :          31 :       def = vect_recog_temp_ssa_var (utype, NULL);
    3934                 :          31 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    3935                 :          31 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3936                 :             :     }
    3937                 :        3200 :   stype = TREE_TYPE (def);
    3938                 :             : 
    3939                 :        3200 :   if (TREE_CODE (def) == INTEGER_CST)
    3940                 :             :     {
    3941                 :        3086 :       if (!tree_fits_uhwi_p (def)
    3942                 :        3086 :           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
    3943                 :        6172 :           || integer_zerop (def))
    3944                 :           0 :         return NULL;
    3945                 :        3086 :       def2 = build_int_cst (stype,
    3946                 :        3086 :                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
    3947                 :             :     }
    3948                 :             :   else
    3949                 :             :     {
    3950                 :         114 :       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
    3951                 :             : 
    3952                 :         114 :       if (vecstype == NULL_TREE)
    3953                 :             :         return NULL;
    3954                 :         114 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3955                 :         114 :       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
    3956                 :         114 :       if (ext_def)
    3957                 :             :         {
    3958                 :          13 :           basic_block new_bb
    3959                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3960                 :          13 :           gcc_assert (!new_bb);
    3961                 :             :         }
    3962                 :             :       else
    3963                 :         101 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3964                 :             : 
    3965                 :         114 :       def2 = vect_recog_temp_ssa_var (stype, NULL);
    3966                 :         114 :       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
    3967                 :         114 :       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
    3968                 :             :                                       gimple_assign_lhs (def_stmt), mask);
    3969                 :         114 :       if (ext_def)
    3970                 :             :         {
    3971                 :          13 :           basic_block new_bb
    3972                 :          13 :             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
    3973                 :          13 :           gcc_assert (!new_bb);
    3974                 :             :         }
    3975                 :             :       else
    3976                 :         101 :         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    3977                 :             :     }
    3978                 :             : 
    3979                 :        3200 :   var1 = vect_recog_temp_ssa_var (utype, NULL);
    3980                 :        6196 :   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
    3981                 :             :                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
    3982                 :             :                                   oprnd0, def);
    3983                 :        3200 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3984                 :             : 
    3985                 :        3200 :   var2 = vect_recog_temp_ssa_var (utype, NULL);
    3986                 :        6196 :   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
    3987                 :             :                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
    3988                 :             :                                   oprnd0, def2);
    3989                 :        3200 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
    3990                 :             : 
    3991                 :             :   /* Pattern detected.  */
    3992                 :        3200 :   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
    3993                 :             : 
    3994                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    3995                 :        3200 :   var = vect_recog_temp_ssa_var (utype, NULL);
    3996                 :        3200 :   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
    3997                 :             : 
    3998                 :        3200 :   if (!useless_type_conversion_p (type, utype))
    3999                 :             :     {
    4000                 :          49 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
    4001                 :          49 :       tree result = vect_recog_temp_ssa_var (type, NULL);
    4002                 :          49 :       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
    4003                 :             :     }
    4004                 :             :   return pattern_stmt;
    4005                 :             : }
    4006                 :             : 
    4007                 :             : /* Detect a vector by vector shift pattern that wouldn't be otherwise
    4008                 :             :    vectorized:
    4009                 :             : 
    4010                 :             :    type a_t;
    4011                 :             :    TYPE b_T, res_T;
    4012                 :             : 
    4013                 :             :    S1 a_t = ;
    4014                 :             :    S2 b_T = ;
    4015                 :             :    S3 res_T = b_T op a_t;
    4016                 :             : 
    4017                 :             :   where type 'TYPE' is a type with different size than 'type',
    4018                 :             :   and op is <<, >> or rotate.
    4019                 :             : 
    4020                 :             :   Also detect cases:
    4021                 :             : 
    4022                 :             :    type a_t;
    4023                 :             :    TYPE b_T, c_T, res_T;
    4024                 :             : 
    4025                 :             :    S0 c_T = ;
    4026                 :             :    S1 a_t = (type) c_T;
    4027                 :             :    S2 b_T = ;
    4028                 :             :    S3 res_T = b_T op a_t;
    4029                 :             : 
    4030                 :             :   Input/Output:
    4031                 :             : 
    4032                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4033                 :             :     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
    4034                 :             :     with a shift/rotate which has same type on both operands, in the
    4035                 :             :     second case just b_T op c_T, in the first case with added cast
    4036                 :             :     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
    4037                 :             : 
    4038                 :             :   Output:
    4039                 :             : 
    4040                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4041                 :             : 
    4042                 :             :   * Return value: A new stmt that will be used to replace the shift/rotate
    4043                 :             :     S3 stmt.  */
    4044                 :             : 
    4045                 :             : static gimple *
    4046                 :    24965620 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
    4047                 :             :                                         stmt_vec_info stmt_vinfo,
    4048                 :             :                                         tree *type_out)
    4049                 :             : {
    4050                 :    24965620 :   gimple *last_stmt = stmt_vinfo->stmt;
    4051                 :    24965620 :   tree oprnd0, oprnd1, lhs, var;
    4052                 :    24965620 :   gimple *pattern_stmt;
    4053                 :    24965620 :   enum tree_code rhs_code;
    4054                 :             : 
    4055                 :    24965620 :   if (!is_gimple_assign (last_stmt))
    4056                 :             :     return NULL;
    4057                 :             : 
    4058                 :    17153088 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4059                 :    17153088 :   switch (rhs_code)
    4060                 :             :     {
    4061                 :      428758 :     case LSHIFT_EXPR:
    4062                 :      428758 :     case RSHIFT_EXPR:
    4063                 :      428758 :     case LROTATE_EXPR:
    4064                 :      428758 :     case RROTATE_EXPR:
    4065                 :      428758 :       break;
    4066                 :             :     default:
    4067                 :             :       return NULL;
    4068                 :             :     }
    4069                 :             : 
    4070                 :      428758 :   lhs = gimple_assign_lhs (last_stmt);
    4071                 :      428758 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4072                 :      428758 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4073                 :      428758 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4074                 :      388306 :       || TREE_CODE (oprnd1) != SSA_NAME
    4075                 :       48241 :       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
    4076                 :       17845 :       || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
    4077                 :       17579 :       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
    4078                 :      446337 :       || TYPE_PRECISION (TREE_TYPE (lhs))
    4079                 :       17579 :          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4080                 :      411179 :     return NULL;
    4081                 :             : 
    4082                 :       17579 :   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
    4083                 :       17579 :   if (!def_vinfo)
    4084                 :             :     return NULL;
    4085                 :             : 
    4086                 :       15704 :   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
    4087                 :       15704 :   if (*type_out == NULL_TREE)
    4088                 :             :     return NULL;
    4089                 :             : 
    4090                 :       10651 :   tree def = NULL_TREE;
    4091                 :       10651 :   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
    4092                 :        8969 :   if (def_stmt && gimple_assign_cast_p (def_stmt))
    4093                 :             :     {
    4094                 :        1781 :       tree rhs1 = gimple_assign_rhs1 (def_stmt);
    4095                 :        1781 :       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
    4096                 :        1781 :           && TYPE_PRECISION (TREE_TYPE (rhs1))
    4097                 :         782 :              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
    4098                 :             :         {
    4099                 :         782 :           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
    4100                 :         782 :               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
    4101                 :             :             def = rhs1;
    4102                 :             :           else
    4103                 :             :             {
    4104                 :         779 :               tree mask
    4105                 :         779 :                 = build_low_bits_mask (TREE_TYPE (rhs1),
    4106                 :         779 :                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
    4107                 :         779 :               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    4108                 :         779 :               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
    4109                 :         779 :               tree vecstype = get_vectype_for_scalar_type (vinfo,
    4110                 :         779 :                                                            TREE_TYPE (rhs1));
    4111                 :         779 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
    4112                 :             :             }
    4113                 :             :         }
    4114                 :             :     }
    4115                 :             : 
    4116                 :         782 :   if (def == NULL_TREE)
    4117                 :             :     {
    4118                 :        9869 :       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4119                 :        9869 :       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
    4120                 :        9869 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4121                 :             :     }
    4122                 :             : 
    4123                 :             :   /* Pattern detected.  */
    4124                 :       10651 :   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
    4125                 :             : 
    4126                 :             :   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
    4127                 :       10651 :   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
    4128                 :       10651 :   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
    4129                 :             : 
    4130                 :       10651 :   return pattern_stmt;
    4131                 :             : }
    4132                 :             : 
    4133                 :             : /* Return true iff the target has a vector optab implementing the operation
    4134                 :             :    CODE on type VECTYPE.  */
    4135                 :             : 
    4136                 :             : static bool
    4137                 :      395383 : target_has_vecop_for_code (tree_code code, tree vectype)
    4138                 :             : {
    4139                 :      395383 :   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
    4140                 :      395383 :   return voptab
    4141                 :      395383 :          && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
    4142                 :             : }
    4143                 :             : 
    4144                 :             : /* Verify that the target has optabs of VECTYPE to perform all the steps
    4145                 :             :    needed by the multiplication-by-immediate synthesis algorithm described by
    4146                 :             :    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
    4147                 :             :    present.  Return true iff the target supports all the steps.  */
    4148                 :             : 
    4149                 :             : static bool
    4150                 :      197327 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
    4151                 :             :                                  tree vectype, bool synth_shift_p)
    4152                 :             : {
    4153                 :      197327 :   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
    4154                 :             :     return false;
    4155                 :             : 
    4156                 :      197327 :   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
    4157                 :      197327 :   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
    4158                 :             : 
    4159                 :      197327 :   if (var == negate_variant
    4160                 :      197327 :       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
    4161                 :             :     return false;
    4162                 :             : 
    4163                 :             :   /* If we must synthesize shifts with additions make sure that vector
    4164                 :             :      addition is available.  */
    4165                 :      196915 :   if ((var == add_variant || synth_shift_p) && !supports_vplus)
    4166                 :             :     return false;
    4167                 :             : 
    4168                 :      115046 :   for (int i = 1; i < alg->ops; i++)
    4169                 :             :     {
    4170                 :       91740 :       switch (alg->op[i])
    4171                 :             :         {
    4172                 :             :         case alg_shift:
    4173                 :             :           break;
    4174                 :       24495 :         case alg_add_t_m2:
    4175                 :       24495 :         case alg_add_t2_m:
    4176                 :       24495 :         case alg_add_factor:
    4177                 :       24495 :           if (!supports_vplus)
    4178                 :             :             return false;
    4179                 :             :           break;
    4180                 :       16394 :         case alg_sub_t_m2:
    4181                 :       16394 :         case alg_sub_t2_m:
    4182                 :       16394 :         case alg_sub_factor:
    4183                 :       16394 :           if (!supports_vminus)
    4184                 :             :             return false;
    4185                 :             :           break;
    4186                 :             :         case alg_unknown:
    4187                 :             :         case alg_m:
    4188                 :             :         case alg_zero:
    4189                 :             :         case alg_impossible:
    4190                 :             :           return false;
    4191                 :           0 :         default:
    4192                 :           0 :           gcc_unreachable ();
    4193                 :             :         }
    4194                 :             :     }
    4195                 :             : 
    4196                 :             :   return true;
    4197                 :             : }
    4198                 :             : 
    4199                 :             : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
    4200                 :             :    putting the final result in DEST.  Append all statements but the last into
    4201                 :             :    VINFO.  Return the last statement.  */
    4202                 :             : 
    4203                 :             : static gimple *
    4204                 :           0 : synth_lshift_by_additions (vec_info *vinfo,
    4205                 :             :                            tree dest, tree op, HOST_WIDE_INT amnt,
    4206                 :             :                            stmt_vec_info stmt_info)
    4207                 :             : {
    4208                 :           0 :   HOST_WIDE_INT i;
    4209                 :           0 :   tree itype = TREE_TYPE (op);
    4210                 :           0 :   tree prev_res = op;
    4211                 :           0 :   gcc_assert (amnt >= 0);
    4212                 :           0 :   for (i = 0; i < amnt; i++)
    4213                 :             :     {
    4214                 :           0 :       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
    4215                 :           0 :                       : dest;
    4216                 :           0 :       gimple *stmt
    4217                 :           0 :         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
    4218                 :           0 :       prev_res = tmp_var;
    4219                 :           0 :       if (i < amnt - 1)
    4220                 :           0 :         append_pattern_def_seq (vinfo, stmt_info, stmt);
    4221                 :             :       else
    4222                 :           0 :         return stmt;
    4223                 :             :     }
    4224                 :           0 :   gcc_unreachable ();
    4225                 :             :   return NULL;
    4226                 :             : }
    4227                 :             : 
    4228                 :             : /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
    4229                 :             :    CODE to operands OP1 and OP2, creating a new temporary SSA var in
    4230                 :             :    the process if necessary.  Append the resulting assignment statements
    4231                 :             :    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
    4232                 :             :    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
    4233                 :             :    left shifts using additions.  */
    4234                 :             : 
    4235                 :             : static tree
    4236                 :       40789 : apply_binop_and_append_stmt (vec_info *vinfo,
    4237                 :             :                              tree_code code, tree op1, tree op2,
    4238                 :             :                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
    4239                 :             : {
    4240                 :       40789 :   if (integer_zerop (op2)
    4241                 :       40789 :       && (code == LSHIFT_EXPR
    4242                 :       35511 :           || code == PLUS_EXPR))
    4243                 :             :     {
    4244                 :       35511 :       gcc_assert (TREE_CODE (op1) == SSA_NAME);
    4245                 :             :       return op1;
    4246                 :             :     }
    4247                 :             : 
    4248                 :        5278 :   gimple *stmt;
    4249                 :        5278 :   tree itype = TREE_TYPE (op1);
    4250                 :        5278 :   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
    4251                 :             : 
    4252                 :        5278 :   if (code == LSHIFT_EXPR
    4253                 :        5278 :       && synth_shift_p)
    4254                 :             :     {
    4255                 :           0 :       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
    4256                 :           0 :                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
    4257                 :           0 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4258                 :           0 :       return tmp_var;
    4259                 :             :     }
    4260                 :             : 
    4261                 :        5278 :   stmt = gimple_build_assign (tmp_var, code, op1, op2);
    4262                 :        5278 :   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4263                 :        5278 :   return tmp_var;
    4264                 :             : }
    4265                 :             : 
    4266                 :             : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
    4267                 :             :    and simple arithmetic operations to be vectorized.  Record the statements
    4268                 :             :    produced in STMT_VINFO and return the last statement in the sequence or
    4269                 :             :    NULL if it's not possible to synthesize such a multiplication.
    4270                 :             :    This function mirrors the behavior of expand_mult_const in expmed.cc but
    4271                 :             :    works on tree-ssa form.  */
    4272                 :             : 
    4273                 :             : static gimple *
    4274                 :      199919 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
    4275                 :             :                              stmt_vec_info stmt_vinfo)
    4276                 :             : {
    4277                 :      199919 :   tree itype = TREE_TYPE (op);
    4278                 :      199919 :   machine_mode mode = TYPE_MODE (itype);
    4279                 :      199919 :   struct algorithm alg;
    4280                 :      199919 :   mult_variant variant;
    4281                 :      199919 :   if (!tree_fits_shwi_p (val))
    4282                 :             :     return NULL;
    4283                 :             : 
    4284                 :             :   /* Multiplication synthesis by shifts, adds and subs can introduce
    4285                 :             :      signed overflow where the original operation didn't.  Perform the
    4286                 :             :      operations on an unsigned type and cast back to avoid this.
    4287                 :             :      In the future we may want to relax this for synthesis algorithms
    4288                 :             :      that we can prove do not cause unexpected overflow.  */
    4289                 :      197327 :   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
    4290                 :             : 
    4291                 :       41641 :   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
    4292                 :      197327 :   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
    4293                 :      197327 :   if (!vectype)
    4294                 :             :     return NULL;
    4295                 :             : 
    4296                 :             :   /* Targets that don't support vector shifts but support vector additions
    4297                 :             :      can synthesize shifts that way.  */
    4298                 :      197327 :   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
    4299                 :             : 
    4300                 :      197327 :   HOST_WIDE_INT hwval = tree_to_shwi (val);
    4301                 :             :   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
    4302                 :             :      The vectorizer's benefit analysis will decide whether it's beneficial
    4303                 :             :      to do this.  */
    4304                 :      394651 :   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
    4305                 :      197324 :                                        ? TYPE_MODE (vectype) : mode,
    4306                 :             :                                        hwval, &alg, &variant, MAX_COST);
    4307                 :      197327 :   if (!possible)
    4308                 :             :     return NULL;
    4309                 :             : 
    4310                 :      197327 :   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
    4311                 :             :     return NULL;
    4312                 :             : 
    4313                 :       23306 :   tree accumulator;
    4314                 :             : 
    4315                 :             :   /* Clear out the sequence of statements so we can populate it below.  */
    4316                 :       23306 :   gimple *stmt = NULL;
    4317                 :             : 
    4318                 :       23306 :   if (cast_to_unsigned_p)
    4319                 :             :     {
    4320                 :       10722 :       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
    4321                 :       10722 :       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
    4322                 :       10722 :       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4323                 :       10722 :       op = tmp_op;
    4324                 :             :     }
    4325                 :             : 
    4326                 :       23306 :   if (alg.op[0] == alg_zero)
    4327                 :         168 :     accumulator = build_int_cst (multtype, 0);
    4328                 :             :   else
    4329                 :             :     accumulator = op;
    4330                 :             : 
    4331                 :       23306 :   bool needs_fixup = (variant == negate_variant)
    4332                 :       23306 :                       || (variant == add_variant);
    4333                 :             : 
    4334                 :      114893 :   for (int i = 1; i < alg.ops; i++)
    4335                 :             :     {
    4336                 :       91587 :       tree shft_log = build_int_cst (multtype, alg.log[i]);
    4337                 :       91587 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4338                 :       91587 :       tree tmp_var = NULL_TREE;
    4339                 :             : 
    4340                 :       91587 :       switch (alg.op[i])
    4341                 :             :         {
    4342                 :       50798 :         case alg_shift:
    4343                 :       50798 :           if (synth_shift_p)
    4344                 :           0 :             stmt
    4345                 :           0 :               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
    4346                 :           0 :                                            alg.log[i], stmt_vinfo);
    4347                 :             :           else
    4348                 :       50798 :             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
    4349                 :             :                                          shft_log);
    4350                 :             :           break;
    4351                 :       19939 :         case alg_add_t_m2:
    4352                 :       19939 :           tmp_var
    4353                 :       19939 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
    4354                 :             :                                            stmt_vinfo, synth_shift_p);
    4355                 :       19939 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4356                 :             :                                        tmp_var);
    4357                 :       19939 :           break;
    4358                 :       15735 :         case alg_sub_t_m2:
    4359                 :       15735 :           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
    4360                 :             :                                                  shft_log, stmt_vinfo,
    4361                 :             :                                                  synth_shift_p);
    4362                 :             :           /* In some algorithms the first step involves zeroing the
    4363                 :             :              accumulator.  If subtracting from such an accumulator
    4364                 :             :              just emit the negation directly.  */
    4365                 :       15735 :           if (integer_zerop (accumulator))
    4366                 :         168 :             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
    4367                 :             :           else
    4368                 :       15567 :             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
    4369                 :             :                                         tmp_var);
    4370                 :             :           break;
    4371                 :           0 :         case alg_add_t2_m:
    4372                 :           0 :           tmp_var
    4373                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4374                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4375                 :           0 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
    4376                 :           0 :           break;
    4377                 :           0 :         case alg_sub_t2_m:
    4378                 :           0 :           tmp_var
    4379                 :           0 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4380                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4381                 :           0 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
    4382                 :           0 :           break;
    4383                 :        4505 :         case alg_add_factor:
    4384                 :        4505 :           tmp_var
    4385                 :        4505 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4386                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4387                 :        4505 :           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
    4388                 :             :                                        tmp_var);
    4389                 :        4505 :           break;
    4390                 :         610 :         case alg_sub_factor:
    4391                 :         610 :           tmp_var
    4392                 :         610 :             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
    4393                 :             :                                            shft_log, stmt_vinfo, synth_shift_p);
    4394                 :         610 :           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
    4395                 :             :                                       accumulator);
    4396                 :         610 :           break;
    4397                 :           0 :         default:
    4398                 :           0 :           gcc_unreachable ();
    4399                 :             :         }
    4400                 :             :       /* We don't want to append the last stmt in the sequence to stmt_vinfo
    4401                 :             :          but rather return it directly.  */
    4402                 :             : 
    4403                 :       91587 :       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
    4404                 :       79209 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4405                 :       91587 :       accumulator = accum_tmp;
    4406                 :             :     }
    4407                 :       23306 :   if (variant == negate_variant)
    4408                 :             :     {
    4409                 :         317 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4410                 :         317 :       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
    4411                 :         317 :       accumulator = accum_tmp;
    4412                 :         317 :       if (cast_to_unsigned_p)
    4413                 :         121 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4414                 :             :     }
    4415                 :       22989 :   else if (variant == add_variant)
    4416                 :             :     {
    4417                 :          78 :       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
    4418                 :          78 :       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
    4419                 :          78 :       accumulator = accum_tmp;
    4420                 :          78 :       if (cast_to_unsigned_p)
    4421                 :          68 :         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
    4422                 :             :     }
    4423                 :             :   /* Move back to a signed if needed.  */
    4424                 :       23306 :   if (cast_to_unsigned_p)
    4425                 :             :     {
    4426                 :       10722 :       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
    4427                 :       10722 :       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
    4428                 :             :     }
    4429                 :             : 
    4430                 :             :   return stmt;
    4431                 :             : }
    4432                 :             : 
    4433                 :             : /* Detect multiplication by constant and convert it into a sequence of
    4434                 :             :    shifts and additions, subtractions, negations.  We reuse the
    4435                 :             :    choose_mult_variant algorithms from expmed.cc
    4436                 :             : 
    4437                 :             :    Input/Output:
    4438                 :             : 
    4439                 :             :    STMT_VINFO: The stmt from which the pattern search begins,
    4440                 :             :    i.e. the mult stmt.
    4441                 :             : 
    4442                 :             :  Output:
    4443                 :             : 
    4444                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4445                 :             : 
    4446                 :             :   * Return value: A new stmt that will be used to replace
    4447                 :             :     the multiplication.  */
    4448                 :             : 
    4449                 :             : static gimple *
    4450                 :    25035648 : vect_recog_mult_pattern (vec_info *vinfo,
    4451                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    4452                 :             : {
    4453                 :    25035648 :   gimple *last_stmt = stmt_vinfo->stmt;
    4454                 :    25035648 :   tree oprnd0, oprnd1, vectype, itype;
    4455                 :    25035648 :   gimple *pattern_stmt;
    4456                 :             : 
    4457                 :    25035648 :   if (!is_gimple_assign (last_stmt))
    4458                 :             :     return NULL;
    4459                 :             : 
    4460                 :    17223116 :   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    4461                 :             :     return NULL;
    4462                 :             : 
    4463                 :     1076534 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4464                 :     1076534 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4465                 :     1076534 :   itype = TREE_TYPE (oprnd0);
    4466                 :             : 
    4467                 :     1076534 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4468                 :     1075542 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4469                 :      619455 :       || !INTEGRAL_TYPE_P (itype)
    4470                 :     1695989 :       || !type_has_mode_precision_p (itype))
    4471                 :      457131 :     return NULL;
    4472                 :             : 
    4473                 :      619403 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4474                 :      619403 :   if (vectype == NULL_TREE)
    4475                 :             :     return NULL;
    4476                 :             : 
    4477                 :             :   /* If the target can handle vectorized multiplication natively,
    4478                 :             :      don't attempt to optimize this.  */
    4479                 :      510914 :   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
    4480                 :      510914 :   if (mul_optab != unknown_optab)
    4481                 :             :     {
    4482                 :      510914 :       machine_mode vec_mode = TYPE_MODE (vectype);
    4483                 :      510914 :       int icode = (int) optab_handler (mul_optab, vec_mode);
    4484                 :      510914 :       if (icode != CODE_FOR_nothing)
    4485                 :             :        return NULL;
    4486                 :             :     }
    4487                 :             : 
    4488                 :      199919 :   pattern_stmt = vect_synth_mult_by_constant (vinfo,
    4489                 :             :                                               oprnd0, oprnd1, stmt_vinfo);
    4490                 :      199919 :   if (!pattern_stmt)
    4491                 :             :     return NULL;
    4492                 :             : 
    4493                 :             :   /* Pattern detected.  */
    4494                 :       23306 :   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
    4495                 :             : 
    4496                 :       23306 :   *type_out = vectype;
    4497                 :             : 
    4498                 :       23306 :   return pattern_stmt;
    4499                 :             : }
    4500                 :             : 
    4501                 :             : /* Detect a signed division by a constant that wouldn't be
    4502                 :             :    otherwise vectorized:
    4503                 :             : 
    4504                 :             :    type a_t, b_t;
    4505                 :             : 
    4506                 :             :    S1 a_t = b_t / N;
    4507                 :             : 
    4508                 :             :   where type 'type' is an integral type and N is a constant.
    4509                 :             : 
    4510                 :             :   Similarly handle modulo by a constant:
    4511                 :             : 
    4512                 :             :    S4 a_t = b_t % N;
    4513                 :             : 
    4514                 :             :   Input/Output:
    4515                 :             : 
    4516                 :             :   * STMT_VINFO: The stmt from which the pattern search begins,
    4517                 :             :     i.e. the division stmt.  S1 is replaced by if N is a power
    4518                 :             :     of two constant and type is signed:
    4519                 :             :   S3  y_t = b_t < 0 ? N - 1 : 0;
    4520                 :             :   S2  x_t = b_t + y_t;
    4521                 :             :   S1' a_t = x_t >> log2 (N);
    4522                 :             : 
    4523                 :             :     S4 is replaced if N is a power of two constant and
    4524                 :             :     type is signed by (where *_T temporaries have unsigned type):
    4525                 :             :   S9  y_T = b_t < 0 ? -1U : 0U;
    4526                 :             :   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
    4527                 :             :   S7  z_t = (type) z_T;
    4528                 :             :   S6  w_t = b_t + z_t;
    4529                 :             :   S5  x_t = w_t & (N - 1);
    4530                 :             :   S4' a_t = x_t - z_t;
    4531                 :             : 
    4532                 :             :   Output:
    4533                 :             : 
    4534                 :             :   * TYPE_OUT: The type of the output of this pattern.
    4535                 :             : 
    4536                 :             :   * Return value: A new stmt that will be used to replace the division
    4537                 :             :     S1 or modulo S4 stmt.  */
    4538                 :             : 
    4539                 :             : static gimple *
    4540                 :    24965617 : vect_recog_divmod_pattern (vec_info *vinfo,
    4541                 :             :                            stmt_vec_info stmt_vinfo, tree *type_out)
    4542                 :             : {
    4543                 :    24965617 :   gimple *last_stmt = stmt_vinfo->stmt;
    4544                 :    24965617 :   tree oprnd0, oprnd1, vectype, itype, cond;
    4545                 :    24965617 :   gimple *pattern_stmt, *def_stmt;
    4546                 :    24965617 :   enum tree_code rhs_code;
    4547                 :    24965617 :   optab optab;
    4548                 :    24965617 :   tree q, cst;
    4549                 :    24965617 :   int dummy_int, prec;
    4550                 :             : 
    4551                 :    24965617 :   if (!is_gimple_assign (last_stmt))
    4552                 :             :     return NULL;
    4553                 :             : 
    4554                 :    17153085 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    4555                 :    17153085 :   switch (rhs_code)
    4556                 :             :     {
    4557                 :      220927 :     case TRUNC_DIV_EXPR:
    4558                 :      220927 :     case EXACT_DIV_EXPR:
    4559                 :      220927 :     case TRUNC_MOD_EXPR:
    4560                 :      220927 :       break;
    4561                 :             :     default:
    4562                 :             :       return NULL;
    4563                 :             :     }
    4564                 :             : 
    4565                 :      220927 :   oprnd0 = gimple_assign_rhs1 (last_stmt);
    4566                 :      220927 :   oprnd1 = gimple_assign_rhs2 (last_stmt);
    4567                 :      220927 :   itype = TREE_TYPE (oprnd0);
    4568                 :      220927 :   if (TREE_CODE (oprnd0) != SSA_NAME
    4569                 :      205153 :       || TREE_CODE (oprnd1) != INTEGER_CST
    4570                 :      116151 :       || TREE_CODE (itype) != INTEGER_TYPE
    4571                 :      337078 :       || !type_has_mode_precision_p (itype))
    4572                 :      104776 :     return NULL;
    4573                 :             : 
    4574                 :      116151 :   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
    4575                 :      116151 :   vectype = get_vectype_for_scalar_type (vinfo, itype);
    4576                 :      116151 :   if (vectype == NULL_TREE)
    4577                 :             :     return NULL;
    4578                 :             : 
    4579                 :       96321 :   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
    4580                 :             :     {
    4581                 :             :       /* If the target can handle vectorized division or modulo natively,
    4582                 :             :          don't attempt to optimize this, since native division is likely
    4583                 :             :          to give smaller code.  */
    4584                 :        1173 :       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
    4585                 :        1173 :       if (optab != unknown_optab)
    4586                 :             :         {
    4587                 :        1173 :           machine_mode vec_mode = TYPE_MODE (vectype);
    4588                 :        1173 :           int icode = (int) optab_handler (optab, vec_mode);
    4589                 :        1173 :           if (icode != CODE_FOR_nothing)
    4590                 :             :             return NULL;
    4591                 :             :         }
    4592                 :             :     }
    4593                 :             : 
    4594                 :       96321 :   prec = TYPE_PRECISION (itype);
    4595                 :       96321 :   if (integer_pow2p (oprnd1))
    4596                 :             :     {
    4597                 :       44903 :       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
    4598                 :        3180 :         return NULL;
    4599                 :             : 
    4600                 :             :       /* Pattern detected.  */
    4601                 :       41723 :       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    4602                 :             : 
    4603                 :       41723 :       *type_out = vectype;
    4604                 :             : 
    4605                 :             :       /* Check if the target supports this internal function.  */
    4606                 :       41723 :       internal_fn ifn = IFN_DIV_POW2;
    4607                 :       41723 :       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
    4608                 :             :         {
    4609                 :           0 :           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
    4610                 :             : 
    4611                 :           0 :           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
    4612                 :           0 :           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
    4613                 :           0 :           gimple_call_set_lhs (div_stmt, var_div);
    4614                 :             : 
    4615                 :           0 :           if (rhs_code == TRUNC_MOD_EXPR)
    4616                 :             :             {
    4617                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
    4618                 :           0 :               def_stmt
    4619                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4620                 :             :                                        LSHIFT_EXPR, var_div, shift);
    4621                 :           0 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4622                 :           0 :               pattern_stmt
    4623                 :           0 :                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4624                 :             :                                        MINUS_EXPR, oprnd0,
    4625                 :             :                                        gimple_assign_lhs (def_stmt));
    4626                 :             :             }
    4627                 :             :           else
    4628                 :             :             pattern_stmt = div_stmt;
    4629                 :           0 :           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
    4630                 :             : 
    4631                 :           0 :           return pattern_stmt;
    4632                 :             :         }
    4633                 :             : 
    4634                 :       41723 :       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
    4635                 :       41723 :                      build_int_cst (itype, 0));
    4636                 :       41723 :       if (rhs_code == TRUNC_DIV_EXPR
    4637                 :       41723 :           || rhs_code == EXACT_DIV_EXPR)
    4638                 :             :         {
    4639                 :       39803 :           tree var = vect_recog_temp_ssa_var (itype, NULL);
    4640                 :       39803 :           tree shift;
    4641                 :       39803 :           def_stmt
    4642                 :       39803 :             = gimple_build_assign (var, COND_EXPR, cond,
    4643                 :       39803 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4644                 :             :                                                 build_int_cst (itype, 1)),
    4645                 :       39803 :                                    build_int_cst (itype, 0));
    4646                 :       39803 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4647                 :       39803 :           var = vect_recog_temp_ssa_var (itype, NULL);
    4648                 :       39803 :           def_stmt
    4649                 :       39803 :             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
    4650                 :             :                                    gimple_assign_lhs (def_stmt));
    4651                 :       39803 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4652                 :             : 
    4653                 :       39803 :           shift = build_int_cst (itype, tree_log2 (oprnd1));
    4654                 :       39803 :           pattern_stmt
    4655                 :       39803 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4656                 :             :                                    RSHIFT_EXPR, var, shift);
    4657                 :             :         }
    4658                 :             :       else
    4659                 :             :         {
    4660                 :        1920 :           tree signmask;
    4661                 :        1920 :           if (compare_tree_int (oprnd1, 2) == 0)
    4662                 :             :             {
    4663                 :         777 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4664                 :         777 :               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
    4665                 :         777 :                                               build_int_cst (itype, 1),
    4666                 :         777 :                                               build_int_cst (itype, 0));
    4667                 :         777 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4668                 :             :             }
    4669                 :             :           else
    4670                 :             :             {
    4671                 :        1143 :               tree utype
    4672                 :        1143 :                 = build_nonstandard_integer_type (prec, 1);
    4673                 :        1143 :               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
    4674                 :        1143 :               tree shift
    4675                 :        1143 :                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
    4676                 :        1143 :                                         - tree_log2 (oprnd1));
    4677                 :        1143 :               tree var = vect_recog_temp_ssa_var (utype, NULL);
    4678                 :             : 
    4679                 :        1143 :               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
    4680                 :        1143 :                                               build_int_cst (utype, -1),
    4681                 :        1143 :                                               build_int_cst (utype, 0));
    4682                 :        1143 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4683                 :        1143 :               var = vect_recog_temp_ssa_var (utype, NULL);
    4684                 :        1143 :               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
    4685                 :             :                                               gimple_assign_lhs (def_stmt),
    4686                 :             :                                               shift);
    4687                 :        1143 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
    4688                 :        1143 :               signmask = vect_recog_temp_ssa_var (itype, NULL);
    4689                 :        1143 :               def_stmt
    4690                 :        1143 :                 = gimple_build_assign (signmask, NOP_EXPR, var);
    4691                 :        1143 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4692                 :             :             }
    4693                 :        1920 :           def_stmt
    4694                 :        1920 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4695                 :             :                                    PLUS_EXPR, oprnd0, signmask);
    4696                 :        1920 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4697                 :        1920 :           def_stmt
    4698                 :        1920 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4699                 :             :                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
    4700                 :        1920 :                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
    4701                 :             :                                                 build_int_cst (itype, 1)));
    4702                 :        1920 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4703                 :             : 
    4704                 :        1920 :           pattern_stmt
    4705                 :        1920 :             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    4706                 :             :                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
    4707                 :             :                                    signmask);
    4708                 :             :         }
    4709                 :             : 
    4710                 :       41723 :       return pattern_stmt;
    4711                 :             :     }
    4712                 :             : 
    4713                 :       51418 :   if ((cst = uniform_integer_cst_p (oprnd1))
    4714                 :       51418 :       && TYPE_UNSIGNED (itype)
    4715                 :             :       && rhs_code == TRUNC_DIV_EXPR
    4716                 :       29879 :       && vectype
    4717                 :       70883 :       && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
    4718                 :             :     {
    4719                 :             :       /* We can use the relationship:
    4720                 :             : 
    4721                 :             :            x // N == ((x+N+2) // (N+1) + x) // (N+1)  for 0 <= x < N(N+3)
    4722                 :             : 
    4723                 :             :          to optimize cases where N+1 is a power of 2, and where // (N+1)
    4724                 :             :          is therefore a shift right.  When operating in modes that are
    4725                 :             :          multiples of a byte in size, there are two cases:
    4726                 :             : 
    4727                 :             :          (1) N(N+3) is not representable, in which case the question
    4728                 :             :              becomes whether the replacement expression overflows.
    4729                 :             :              It is enough to test that x+N+2 does not overflow,
    4730                 :             :              i.e. that x < MAX-(N+1).
    4731                 :             : 
    4732                 :             :          (2) N(N+3) is representable, in which case it is the (only)
    4733                 :             :              bound that we need to check.
    4734                 :             : 
    4735                 :             :          ??? For now we just handle the case where // (N+1) is a shift
    4736                 :             :          right by half the precision, since some architectures can
    4737                 :             :          optimize the associated addition and shift combinations
    4738                 :             :          into single instructions.  */
    4739                 :             : 
    4740                 :       12727 :       auto wcst = wi::to_wide (cst);
    4741                 :       12727 :       int pow = wi::exact_log2 (wcst + 1);
    4742                 :       12727 :       if (pow == prec / 2)
    4743                 :             :         {
    4744                 :         307 :           gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
    4745                 :             : 
    4746                 :         307 :           gimple_ranger ranger;
    4747                 :         307 :           int_range_max r;
    4748                 :             : 
    4749                 :             :           /* Check that no overflow will occur.  If we don't have range
    4750                 :             :              information we can't perform the optimization.  */
    4751                 :             : 
    4752                 :         307 :           if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
    4753                 :             :             {
    4754                 :         305 :               wide_int max = r.upper_bound ();
    4755                 :         305 :               wide_int one = wi::shwi (1, prec);
    4756                 :         305 :               wide_int adder = wi::add (one, wi::lshift (one, pow));
    4757                 :         305 :               wi::overflow_type ovf;
    4758                 :         305 :               wi::add (max, adder, UNSIGNED, &ovf);
    4759                 :         305 :               if (ovf == wi::OVF_NONE)
    4760                 :             :                 {
    4761                 :         248 :                   *type_out = vectype;
    4762                 :         248 :                   tree tadder = wide_int_to_tree (itype, adder);
    4763                 :         248 :                   tree rshift = wide_int_to_tree (itype, pow);
    4764                 :             : 
    4765                 :         248 :                   tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
    4766                 :         248 :                   gassign *patt1
    4767                 :         248 :                     = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
    4768                 :         248 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    4769                 :             : 
    4770                 :         248 :                   tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
    4771                 :         248 :                   patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
    4772                 :             :                                                rshift);
    4773                 :         248 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    4774                 :             : 
    4775                 :         248 :                   tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
    4776                 :         248 :                   patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
    4777                 :             :                                                oprnd0);
    4778                 :         248 :                   append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
    4779                 :             : 
    4780                 :         248 :                   tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
    4781                 :         248 :                   pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
    4782                 :             :                                                       new_lhs3, rshift);
    4783                 :             : 
    4784                 :         248 :                   return pattern_stmt;
    4785                 :             :                 }
    4786                 :         305 :             }
    4787                 :         307 :         }
    4788                 :             :     }
    4789                 :             : 
    4790                 :       51170 :   if (prec > HOST_BITS_PER_WIDE_INT
    4791                 :       51170 :       || integer_zerop (oprnd1))
    4792                 :         451 :     return NULL;
    4793                 :             : 
    4794                 :       50719 :   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
    4795                 :             :     return NULL;
    4796                 :             : 
    4797                 :       14251 :   if (TYPE_UNSIGNED (itype))
    4798                 :             :     {
    4799                 :        9446 :       unsigned HOST_WIDE_INT mh, ml;
    4800                 :        9446 :       int pre_shift, post_shift;
    4801                 :        9446 :       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
    4802                 :        9446 :                                   & GET_MODE_MASK (itype_mode));
    4803                 :        9446 :       tree t1, t2, t3, t4;
    4804                 :             : 
    4805                 :        9446 :       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
    4806                 :             :         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
    4807                 :          30 :         return NULL;
    4808                 :             : 
    4809                 :             :       /* Find a suitable multiplier and right shift count
    4810                 :             :          instead of multiplying with D.  */
    4811                 :        9416 :       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
    4812                 :             : 
    4813                 :             :       /* If the suggested multiplier is more than SIZE bits, we can do better
    4814                 :             :          for even divisors, using an initial right shift.  */
    4815                 :        9416 :       if (mh != 0 && (d & 1) == 0)
    4816                 :             :         {
    4817                 :         248 :           pre_shift = ctz_or_zero (d);
    4818                 :         248 :           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
    4819                 :             :                                   &ml, &post_shift, &dummy_int);
    4820                 :         248 :           gcc_assert (!mh);
    4821                 :             :         }
    4822                 :             :       else
    4823                 :             :         pre_shift = 0;
    4824                 :             : 
    4825                 :         609 :       if (mh != 0)
    4826                 :             :         {
    4827                 :         609 :           if (post_shift - 1 >= prec)
    4828                 :             :             return NULL;
    4829                 :             : 
    4830                 :             :           /* t1 = oprnd0 h* ml;
    4831                 :             :              t2 = oprnd0 - t1;
    4832                 :             :              t3 = t2 >> 1;
    4833                 :             :              t4 = t1 + t3;
    4834                 :             :              q = t4 >> (post_shift - 1);  */
    4835                 :         609 :           t1 = vect_recog_temp_ssa_var (itype, NULL);
    4836                 :         609 :           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    4837                 :             :                                           build_int_cst (itype, ml));
    4838                 :         609 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4839                 :             : 
    4840                 :         609 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    4841                 :         609 :           def_stmt
    4842                 :         609 :             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
    4843                 :         609 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4844                 :             : 
    4845                 :         609 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    4846                 :         609 :           def_stmt
    4847                 :         609 :             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
    4848                 :         609 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4849                 :             : 
    4850                 :         609 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    4851                 :         609 :           def_stmt
    4852                 :         609 :             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
    4853                 :             : 
    4854                 :         609 :           if (post_shift != 1)
    4855                 :             :             {
    4856                 :         609 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4857                 :             : 
    4858                 :         609 :               q = vect_recog_temp_ssa_var (itype, NULL);
    4859                 :         609 :               pattern_stmt
    4860                 :         609 :                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
    4861                 :         609 :                                        build_int_cst (itype, post_shift - 1));
    4862                 :             :             }
    4863                 :             :           else
    4864                 :             :             {
    4865                 :             :               q = t4;
    4866                 :             :               pattern_stmt = def_stmt;
    4867                 :             :             }
    4868                 :             :         }
    4869                 :             :       else
    4870                 :             :         {
    4871                 :        8807 :           if (pre_shift >= prec || post_shift >= prec)
    4872                 :             :             return NULL;
    4873                 :             : 
    4874                 :             :           /* t1 = oprnd0 >> pre_shift;
    4875                 :             :              t2 = t1 h* ml;
    4876                 :             :              q = t2 >> post_shift;  */
    4877                 :        8807 :           if (pre_shift)
    4878                 :             :             {
    4879                 :         248 :               t1 = vect_recog_temp_ssa_var (itype, NULL);
    4880                 :         248 :               def_stmt
    4881                 :         248 :                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
    4882                 :             :                                        build_int_cst (NULL, pre_shift));
    4883                 :         248 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4884                 :             :             }
    4885                 :             :           else
    4886                 :             :             t1 = oprnd0;
    4887                 :             : 
    4888                 :        8807 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    4889                 :        8807 :           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
    4890                 :             :                                           build_int_cst (itype, ml));
    4891                 :             : 
    4892                 :        8807 :           if (post_shift)
    4893                 :             :             {
    4894                 :        8801 :               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4895                 :             : 
    4896                 :        8801 :               q = vect_recog_temp_ssa_var (itype, NULL);
    4897                 :        8801 :               def_stmt
    4898                 :        8801 :                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
    4899                 :             :                                        build_int_cst (itype, post_shift));
    4900                 :             :             }
    4901                 :             :           else
    4902                 :             :             q = t2;
    4903                 :             : 
    4904                 :             :           pattern_stmt = def_stmt;
    4905                 :             :         }
    4906                 :             :     }
    4907                 :             :   else
    4908                 :             :     {
    4909                 :        4805 :       unsigned HOST_WIDE_INT ml;
    4910                 :        4805 :       int post_shift;
    4911                 :        4805 :       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
    4912                 :        4805 :       unsigned HOST_WIDE_INT abs_d;
    4913                 :        4805 :       bool add = false;
    4914                 :        4805 :       tree t1, t2, t3, t4;
    4915                 :             : 
    4916                 :             :       /* Give up for -1.  */
    4917                 :        4805 :       if (d == -1)
    4918                 :           0 :         return NULL;
    4919                 :             : 
    4920                 :             :       /* Since d might be INT_MIN, we have to cast to
    4921                 :             :          unsigned HOST_WIDE_INT before negating to avoid
    4922                 :             :          undefined signed overflow.  */
    4923                 :        9610 :       abs_d = (d >= 0
    4924                 :        4805 :                ? (unsigned HOST_WIDE_INT) d
    4925                 :         157 :                : - (unsigned HOST_WIDE_INT) d);
    4926                 :             : 
    4927                 :             :       /* n rem d = n rem -d */
    4928                 :        4805 :       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
    4929                 :             :         {
    4930                 :           0 :           d = abs_d;
    4931                 :           0 :           oprnd1 = build_int_cst (itype, abs_d);
    4932                 :             :         }
    4933                 :        4805 :       if (HOST_BITS_PER_WIDE_INT >= prec
    4934                 :        4805 :           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
    4935                 :             :         /* This case is not handled correctly below.  */
    4936                 :             :         return NULL;
    4937                 :             : 
    4938                 :        4805 :       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
    4939                 :        4805 :       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
    4940                 :             :         {
    4941                 :        1420 :           add = true;
    4942                 :        1420 :           ml |= HOST_WIDE_INT_M1U << (prec - 1);
    4943                 :             :         }
    4944                 :        4805 :       if (post_shift >= prec)
    4945                 :             :         return NULL;
    4946                 :             : 
    4947                 :             :       /* t1 = oprnd0 h* ml;  */
    4948                 :        4805 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    4949                 :        4805 :       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
    4950                 :             :                                       build_int_cst (itype, ml));
    4951                 :             : 
    4952                 :        4805 :       if (add)
    4953                 :             :         {
    4954                 :             :           /* t2 = t1 + oprnd0;  */
    4955                 :        1420 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4956                 :        1420 :           t2 = vect_recog_temp_ssa_var (itype, NULL);
    4957                 :        1420 :           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
    4958                 :             :         }
    4959                 :             :       else
    4960                 :             :         t2 = t1;
    4961                 :             : 
    4962                 :        4805 :       if (post_shift)
    4963                 :             :         {
    4964                 :             :           /* t3 = t2 >> post_shift;  */
    4965                 :        4180 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4966                 :        4180 :           t3 = vect_recog_temp_ssa_var (itype, NULL);
    4967                 :        4180 :           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
    4968                 :             :                                           build_int_cst (itype, post_shift));
    4969                 :             :         }
    4970                 :             :       else
    4971                 :             :         t3 = t2;
    4972                 :             : 
    4973                 :        4805 :       int msb = 1;
    4974                 :        4805 :       value_range r;
    4975                 :        9610 :       get_range_query (cfun)->range_of_expr (r, oprnd0);
    4976                 :        4805 :       if (!r.varying_p () && !r.undefined_p ())
    4977                 :             :         {
    4978                 :        2792 :           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
    4979                 :             :             msb = 0;
    4980                 :         626 :           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
    4981                 :             :             msb = -1;
    4982                 :             :         }
    4983                 :             : 
    4984                 :        2166 :       if (msb == 0 && d >= 0)
    4985                 :             :         {
    4986                 :             :           /* q = t3;  */
    4987                 :             :           q = t3;
    4988                 :             :           pattern_stmt = def_stmt;
    4989                 :             :         }
    4990                 :             :       else
    4991                 :             :         {
    4992                 :             :           /* t4 = oprnd0 >> (prec - 1);
    4993                 :             :              or if we know from VRP that oprnd0 >= 0
    4994                 :             :              t4 = 0;
    4995                 :             :              or if we know from VRP that oprnd0 < 0
    4996                 :             :              t4 = -1;  */
    4997                 :        2705 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    4998                 :        2705 :           t4 = vect_recog_temp_ssa_var (itype, NULL);
    4999                 :        2705 :           if (msb != 1)
    5000                 :          74 :             def_stmt = gimple_build_assign (t4, INTEGER_CST,
    5001                 :             :                                             build_int_cst (itype, msb));
    5002                 :             :           else
    5003                 :        2631 :             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
    5004                 :        2631 :                                             build_int_cst (itype, prec - 1));
    5005                 :        2705 :           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5006                 :             : 
    5007                 :             :           /* q = t3 - t4;  or q = t4 - t3;  */
    5008                 :        2705 :           q = vect_recog_temp_ssa_var (itype, NULL);
    5009                 :        5253 :           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
    5010                 :             :                                               d < 0 ? t3 : t4);
    5011                 :             :         }
    5012                 :        4805 :     }
    5013                 :             : 
    5014                 :       14221 :   if (rhs_code == TRUNC_MOD_EXPR)
    5015                 :             :     {
    5016                 :        5856 :       tree r, t1;
    5017                 :             : 
    5018                 :             :       /* We divided.  Now finish by:
    5019                 :             :          t1 = q * oprnd1;
    5020                 :             :          r = oprnd0 - t1;  */
    5021                 :        5856 :       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
    5022                 :             : 
    5023                 :        5856 :       t1 = vect_recog_temp_ssa_var (itype, NULL);
    5024                 :        5856 :       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
    5025                 :        5856 :       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
    5026                 :             : 
    5027                 :        5856 :       r = vect_recog_temp_ssa_var (itype, NULL);
    5028                 :        5856 :       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
    5029                 :             :     }
    5030                 :             : 
    5031                 :             :   /* Pattern detected.  */
    5032                 :       14221 :   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
    5033                 :             : 
    5034                 :       14221 :   *type_out = vectype;
    5035                 :       14221 :   return pattern_stmt;
    5036                 :             : }
    5037                 :             : 
    5038                 :             : /* Function vect_recog_mixed_size_cond_pattern
    5039                 :             : 
    5040                 :             :    Try to find the following pattern:
    5041                 :             : 
    5042                 :             :      type x_t, y_t;
    5043                 :             :      TYPE a_T, b_T, c_T;
    5044                 :             :    loop:
    5045                 :             :      S1  a_T = x_t CMP y_t ? b_T : c_T;
    5046                 :             : 
    5047                 :             :    where type 'TYPE' is an integral type which has different size
    5048                 :             :    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
    5049                 :             :    than 'type', the constants need to fit into an integer type
    5050                 :             :    with the same width as 'type') or results of conversion from 'type'.
    5051                 :             : 
    5052                 :             :    Input:
    5053                 :             : 
    5054                 :             :    * STMT_VINFO: The stmt from which the pattern search begins.
    5055                 :             : 
    5056                 :             :    Output:
    5057                 :             : 
    5058                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5059                 :             : 
    5060                 :             :    * Return value: A new stmt that will be used to replace the pattern.
    5061                 :             :         Additionally a def_stmt is added.
    5062                 :             : 
    5063                 :             :         a_it = x_t CMP y_t ? b_it : c_it;
    5064                 :             :         a_T = (TYPE) a_it;  */
    5065                 :             : 
    5066                 :             : static gimple *
    5067                 :    25109271 : vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
    5068                 :             :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    5069                 :             : {
    5070                 :    25109271 :   gimple *last_stmt = stmt_vinfo->stmt;
    5071                 :    25109271 :   tree cond_expr, then_clause, else_clause;
    5072                 :    25109271 :   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
    5073                 :    25109271 :   gimple *pattern_stmt, *def_stmt;
    5074                 :    25109271 :   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
    5075                 :    25109271 :   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
    5076                 :    25109271 :   bool promotion;
    5077                 :    25109271 :   tree comp_scalar_type;
    5078                 :             : 
    5079                 :    25109271 :   if (!is_gimple_assign (last_stmt)
    5080                 :    17296739 :       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
    5081                 :    25219428 :       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
    5082                 :             :     return NULL;
    5083                 :             : 
    5084                 :      108392 :   cond_expr = gimple_assign_rhs1 (last_stmt);
    5085                 :      108392 :   then_clause = gimple_assign_rhs2 (last_stmt);
    5086                 :      108392 :   else_clause = gimple_assign_rhs3 (last_stmt);
    5087                 :             : 
    5088                 :      108392 :   if (!COMPARISON_CLASS_P (cond_expr))
    5089                 :             :     return NULL;
    5090                 :             : 
    5091                 :       41723 :   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
    5092                 :       41723 :   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
    5093                 :       41723 :   if (comp_vectype == NULL_TREE)
    5094                 :             :     return NULL;
    5095                 :             : 
    5096                 :       41723 :   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
    5097                 :       41723 :   if (types_compatible_p (type, comp_scalar_type)
    5098                 :        1143 :       || ((TREE_CODE (then_clause) != INTEGER_CST
    5099                 :        1143 :            || TREE_CODE (else_clause) != INTEGER_CST)
    5100                 :           0 :           && !INTEGRAL_TYPE_P (comp_scalar_type))
    5101                 :       42866 :       || !INTEGRAL_TYPE_P (type))
    5102                 :             :     return NULL;
    5103                 :             : 
    5104                 :        1143 :   if ((TREE_CODE (then_clause) != INTEGER_CST
    5105                 :           0 :        && !type_conversion_p (vinfo, then_clause, false,
    5106                 :             :                               &orig_type0, &def_stmt0, &promotion))
    5107                 :        1143 :       || (TREE_CODE (else_clause) != INTEGER_CST
    5108                 :           0 :           && !type_conversion_p (vinfo, else_clause, false,
    5109                 :             :                                  &orig_type1, &def_stmt1, &promotion)))
    5110                 :           0 :     return NULL;
    5111                 :             : 
    5112                 :           0 :   if (orig_type0 && orig_type1
    5113                 :        1143 :       && !types_compatible_p (orig_type0, orig_type1))
    5114                 :             :     return NULL;
    5115                 :             : 
    5116                 :        1143 :   if (orig_type0)
    5117                 :             :     {
    5118                 :           0 :       if (!types_compatible_p (orig_type0, comp_scalar_type))
    5119                 :             :         return NULL;
    5120                 :           0 :       then_clause = gimple_assign_rhs1 (def_stmt0);
    5121                 :           0 :       itype = orig_type0;
    5122                 :             :     }
    5123                 :             : 
    5124                 :        1143 :   if (orig_type1)
    5125                 :             :     {
    5126                 :           0 :       if (!types_compatible_p (orig_type1, comp_scalar_type))
    5127                 :             :         return NULL;
    5128                 :           0 :       else_clause = gimple_assign_rhs1 (def_stmt1);
    5129                 :           0 :       itype = orig_type1;
    5130                 :             :     }
    5131                 :             : 
    5132                 :             : 
    5133                 :        1143 :   HOST_WIDE_INT cmp_mode_size
    5134                 :        1143 :     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
    5135                 :             : 
    5136                 :        1143 :   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
    5137                 :        2286 :   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
    5138                 :             :     return NULL;
    5139                 :             : 
    5140                 :           0 :   vectype = get_vectype_for_scalar_type (vinfo, type);
    5141                 :           0 :   if (vectype == NULL_TREE)
    5142                 :             :     return NULL;
    5143                 :             : 
    5144                 :           0 :   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
    5145                 :             :     return NULL;
    5146                 :             : 
    5147                 :           0 :   if (itype == NULL_TREE)
    5148                 :           0 :     itype = build_nonstandard_integer_type (cmp_mode_size,
    5149                 :           0 :                                             TYPE_UNSIGNED (type));
    5150                 :             : 
    5151                 :           0 :   if (itype == NULL_TREE
    5152                 :           0 :       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
    5153                 :           0 :     return NULL;
    5154                 :             : 
    5155                 :           0 :   vecitype = get_vectype_for_scalar_type (vinfo, itype);
    5156                 :           0 :   if (vecitype == NULL_TREE)
    5157                 :             :     return NULL;
    5158                 :             : 
    5159                 :           0 :   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
    5160                 :             :     return NULL;
    5161                 :             : 
    5162                 :           0 :   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
    5163                 :             :     {
    5164                 :           0 :       if ((TREE_CODE (then_clause) == INTEGER_CST
    5165                 :           0 :            && !int_fits_type_p (then_clause, itype))
    5166                 :           0 :           || (TREE_CODE (else_clause) == INTEGER_CST
    5167                 :           0 :               && !int_fits_type_p (else_clause, itype)))
    5168                 :             :         return NULL;
    5169                 :             :     }
    5170                 :             : 
    5171                 :           0 :   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5172                 :             :                                   COND_EXPR, unshare_expr (cond_expr),
    5173                 :             :                                   fold_convert (itype, then_clause),
    5174                 :             :                                   fold_convert (itype, else_clause));
    5175                 :           0 :   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
    5176                 :             :                                       NOP_EXPR, gimple_assign_lhs (def_stmt));
    5177                 :             : 
    5178                 :           0 :   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
    5179                 :           0 :   *type_out = vectype;
    5180                 :             : 
    5181                 :           0 :   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
    5182                 :             : 
    5183                 :           0 :   return pattern_stmt;
    5184                 :             : }
    5185                 :             : 
    5186                 :             : 
    5187                 :             : /* Helper function of vect_recog_bool_pattern.  Called recursively, return
    5188                 :             :    true if bool VAR can and should be optimized that way.  Assume it shouldn't
    5189                 :             :    in case it's a result of a comparison which can be directly vectorized into
    5190                 :             :    a vector comparison.  Fills in STMTS with all stmts visited during the
    5191                 :             :    walk.  */
    5192                 :             : 
    5193                 :             : static bool
    5194                 :      174037 : check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
    5195                 :             : {
    5196                 :      174037 :   tree rhs1;
    5197                 :      174037 :   enum tree_code rhs_code;
    5198                 :             : 
    5199                 :      174037 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    5200                 :      174037 :   if (!def_stmt_info)
    5201                 :             :     return false;
    5202                 :             : 
    5203                 :      337985 :   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
    5204                 :      163948 :   if (!def_stmt)
    5205                 :             :     return false;
    5206                 :             : 
    5207                 :      163948 :   if (stmts.contains (def_stmt))
    5208                 :             :     return true;
    5209                 :             : 
    5210                 :      163948 :   rhs1 = gimple_assign_rhs1 (def_stmt);
    5211                 :      163948 :   rhs_code = gimple_assign_rhs_code (def_stmt);
    5212                 :      163948 :   switch (rhs_code)
    5213                 :             :     {
    5214                 :           0 :     case SSA_NAME:
    5215                 :           0 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    5216                 :             :         return false;
    5217                 :             :       break;
    5218                 :             : 
    5219                 :        2227 :     CASE_CONVERT:
    5220                 :        2227 :       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
    5221                 :             :         return false;
    5222                 :         163 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    5223                 :             :         return false;
    5224                 :             :       break;
    5225                 :             : 
    5226                 :        2762 :     case BIT_NOT_EXPR:
    5227                 :        2762 :       if (! check_bool_pattern (rhs1, vinfo, stmts))
    5228                 :             :         return false;
    5229                 :             :       break;
    5230                 :             : 
    5231                 :       20582 :     case BIT_AND_EXPR:
    5232                 :       20582 :     case BIT_IOR_EXPR:
    5233                 :       20582 :     case BIT_XOR_EXPR:
    5234                 :       20582 :       if (! check_bool_pattern (rhs1, vinfo, stmts)
    5235                 :       20582 :           || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
    5236                 :       20582 :         return false;
    5237                 :             :       break;
    5238                 :             : 
    5239                 :      138377 :     default:
    5240                 :      138377 :       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
    5241                 :             :         {
    5242                 :      124856 :           tree vecitype, comp_vectype;
    5243                 :             : 
    5244                 :             :           /* If the comparison can throw, then is_gimple_condexpr will be
    5245                 :             :              false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
    5246                 :      124856 :           if (stmt_could_throw_p (cfun, def_stmt))
    5247                 :             :             return false;
    5248                 :             : 
    5249                 :      124785 :           comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
    5250                 :      124785 :           if (comp_vectype == NULL_TREE)
    5251                 :             :             return false;
    5252                 :             : 
    5253                 :      114323 :           tree mask_type = get_mask_type_for_scalar_type (vinfo,
    5254                 :      114323 :                                                           TREE_TYPE (rhs1));
    5255                 :      114323 :           if (mask_type
    5256                 :      114323 :               && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
    5257                 :             :             return false;
    5258                 :             : 
    5259                 :       36261 :           if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
    5260                 :             :             {
    5261                 :        5163 :               scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
    5262                 :        5163 :               tree itype
    5263                 :        5163 :                 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
    5264                 :        5163 :               vecitype = get_vectype_for_scalar_type (vinfo, itype);
    5265                 :        5163 :               if (vecitype == NULL_TREE)
    5266                 :           0 :                 return false;
    5267                 :             :             }
    5268                 :             :           else
    5269                 :             :             vecitype = comp_vectype;
    5270                 :       36261 :           if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
    5271                 :             :             return false;
    5272                 :             :         }
    5273                 :             :       else
    5274                 :             :         return false;
    5275                 :             :       break;
    5276                 :             :     }
    5277                 :             : 
    5278                 :           0 :   bool res = stmts.add (def_stmt);
    5279                 :             :   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
    5280                 :           0 :   gcc_assert (!res);
    5281                 :             : 
    5282                 :             :   return true;
    5283                 :             : }
    5284                 :             : 
    5285                 :             : 
    5286                 :             : /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
    5287                 :             :    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
    5288                 :             :    pattern sequence.  */
    5289                 :             : 
    5290                 :             : static tree
    5291                 :           0 : adjust_bool_pattern_cast (vec_info *vinfo,
    5292                 :             :                           tree type, tree var, stmt_vec_info stmt_info)
    5293                 :             : {
    5294                 :           0 :   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
    5295                 :             :                                            NOP_EXPR, var);
    5296                 :           0 :   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
    5297                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    5298                 :           0 :   return gimple_assign_lhs (cast_stmt);
    5299                 :             : }
    5300                 :             : 
    5301                 :             : /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
    5302                 :             :    VAR is an SSA_NAME that should be transformed from bool to a wider integer
    5303                 :             :    type, OUT_TYPE is the desired final integer type of the whole pattern.
    5304                 :             :    STMT_INFO is the info of the pattern root and is where pattern stmts should
    5305                 :             :    be associated with.  DEFS is a map of pattern defs.  */
    5306                 :             : 
    5307                 :             : static void
    5308                 :           0 : adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
    5309                 :             :                      stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
    5310                 :             : {
    5311                 :           0 :   gimple *stmt = SSA_NAME_DEF_STMT (var);
    5312                 :           0 :   enum tree_code rhs_code, def_rhs_code;
    5313                 :           0 :   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
    5314                 :           0 :   location_t loc;
    5315                 :           0 :   gimple *pattern_stmt, *def_stmt;
    5316                 :           0 :   tree trueval = NULL_TREE;
    5317                 :             : 
    5318                 :           0 :   rhs1 = gimple_assign_rhs1 (stmt);
    5319                 :           0 :   rhs2 = gimple_assign_rhs2 (stmt);
    5320                 :           0 :   rhs_code = gimple_assign_rhs_code (stmt);
    5321                 :           0 :   loc = gimple_location (stmt);
    5322                 :           0 :   switch (rhs_code)
    5323                 :             :     {
    5324                 :           0 :     case SSA_NAME:
    5325                 :           0 :     CASE_CONVERT:
    5326                 :           0 :       irhs1 = *defs.get (rhs1);
    5327                 :           0 :       itype = TREE_TYPE (irhs1);
    5328                 :           0 :       pattern_stmt
    5329                 :           0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5330                 :             :                                SSA_NAME, irhs1);
    5331                 :           0 :       break;
    5332                 :             : 
    5333                 :           0 :     case BIT_NOT_EXPR:
    5334                 :           0 :       irhs1 = *defs.get (rhs1);
    5335                 :           0 :       itype = TREE_TYPE (irhs1);
    5336                 :           0 :       pattern_stmt
    5337                 :           0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5338                 :           0 :                                BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
    5339                 :           0 :       break;
    5340                 :             : 
    5341                 :           0 :     case BIT_AND_EXPR:
    5342                 :             :       /* Try to optimize x = y & (a < b ? 1 : 0); into
    5343                 :             :          x = (a < b ? y : 0);
    5344                 :             : 
    5345                 :             :          E.g. for:
    5346                 :             :            bool a_b, b_b, c_b;
    5347                 :             :            TYPE d_T;
    5348                 :             : 
    5349                 :             :            S1  a_b = x1 CMP1 y1;
    5350                 :             :            S2  b_b = x2 CMP2 y2;
    5351                 :             :            S3  c_b = a_b & b_b;
    5352                 :             :            S4  d_T = (TYPE) c_b;
    5353                 :             : 
    5354                 :             :          we would normally emit:
    5355                 :             : 
    5356                 :             :            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5357                 :             :            S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5358                 :             :            S3'  c_T = a_T & b_T;
    5359                 :             :            S4'  d_T = c_T;
    5360                 :             : 
    5361                 :             :          but we can save one stmt by using the
    5362                 :             :          result of one of the COND_EXPRs in the other COND_EXPR and leave
    5363                 :             :          BIT_AND_EXPR stmt out:
    5364                 :             : 
    5365                 :             :            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5366                 :             :            S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5367                 :             :            S4'  f_T = c_T;
    5368                 :             : 
    5369                 :             :          At least when VEC_COND_EXPR is implemented using masks
    5370                 :             :          cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
    5371                 :             :          computes the comparison masks and ands it, in one case with
    5372                 :             :          all ones vector, in the other case with a vector register.
    5373                 :             :          Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
    5374                 :             :          often more expensive.  */
    5375                 :           0 :       def_stmt = SSA_NAME_DEF_STMT (rhs2);
    5376                 :           0 :       def_rhs_code = gimple_assign_rhs_code (def_stmt);
    5377                 :           0 :       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
    5378                 :             :         {
    5379                 :           0 :           irhs1 = *defs.get (rhs1);
    5380                 :           0 :           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
    5381                 :           0 :           if (TYPE_PRECISION (TREE_TYPE (irhs1))
    5382                 :           0 :               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
    5383                 :             :             {
    5384                 :           0 :               rhs_code = def_rhs_code;
    5385                 :           0 :               rhs1 = def_rhs1;
    5386                 :           0 :               rhs2 = gimple_assign_rhs2 (def_stmt);
    5387                 :           0 :               trueval = irhs1;
    5388                 :           0 :               goto do_compare;
    5389                 :             :             }
    5390                 :             :           else
    5391                 :           0 :             irhs2 = *defs.get (rhs2);
    5392                 :           0 :           goto and_ior_xor;
    5393                 :             :         }
    5394                 :           0 :       def_stmt = SSA_NAME_DEF_STMT (rhs1);
    5395                 :           0 :       def_rhs_code = gimple_assign_rhs_code (def_stmt);
    5396                 :           0 :       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
    5397                 :             :         {
    5398                 :           0 :           irhs2 = *defs.get (rhs2);
    5399                 :           0 :           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
    5400                 :           0 :           if (TYPE_PRECISION (TREE_TYPE (irhs2))
    5401                 :           0 :               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
    5402                 :             :             {
    5403                 :           0 :               rhs_code = def_rhs_code;
    5404                 :           0 :               rhs1 = def_rhs1;
    5405                 :           0 :               rhs2 = gimple_assign_rhs2 (def_stmt);
    5406                 :           0 :               trueval = irhs2;
    5407                 :           0 :               goto do_compare;
    5408                 :             :             }
    5409                 :             :           else
    5410                 :           0 :             irhs1 = *defs.get (rhs1);
    5411                 :           0 :           goto and_ior_xor;
    5412                 :             :         }
    5413                 :             :       /* FALLTHRU */
    5414                 :           0 :     case BIT_IOR_EXPR:
    5415                 :           0 :     case BIT_XOR_EXPR:
    5416                 :           0 :       irhs1 = *defs.get (rhs1);
    5417                 :           0 :       irhs2 = *defs.get (rhs2);
    5418                 :           0 :     and_ior_xor:
    5419                 :           0 :       if (TYPE_PRECISION (TREE_TYPE (irhs1))
    5420                 :           0 :           != TYPE_PRECISION (TREE_TYPE (irhs2)))
    5421                 :             :         {
    5422                 :           0 :           int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
    5423                 :           0 :           int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
    5424                 :           0 :           int out_prec = TYPE_PRECISION (out_type);
    5425                 :           0 :           if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
    5426                 :           0 :             irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
    5427                 :             :                                               stmt_info);
    5428                 :           0 :           else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
    5429                 :           0 :             irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
    5430                 :             :                                               stmt_info);
    5431                 :             :           else
    5432                 :             :             {
    5433                 :           0 :               irhs1 = adjust_bool_pattern_cast (vinfo,
    5434                 :             :                                                 out_type, irhs1, stmt_info);
    5435                 :           0 :               irhs2 = adjust_bool_pattern_cast (vinfo,
    5436                 :             :                                                 out_type, irhs2, stmt_info);
    5437                 :             :             }
    5438                 :             :         }
    5439                 :           0 :       itype = TREE_TYPE (irhs1);
    5440                 :           0 :       pattern_stmt
    5441                 :           0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5442                 :             :                                rhs_code, irhs1, irhs2);
    5443                 :           0 :       break;
    5444                 :             : 
    5445                 :           0 :     default:
    5446                 :           0 :     do_compare:
    5447                 :           0 :       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
    5448                 :           0 :       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
    5449                 :           0 :           || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
    5450                 :           0 :           || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
    5451                 :           0 :                        GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
    5452                 :             :         {
    5453                 :           0 :           scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
    5454                 :           0 :           itype
    5455                 :           0 :             = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
    5456                 :             :         }
    5457                 :             :       else
    5458                 :           0 :         itype = TREE_TYPE (rhs1);
    5459                 :           0 :       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
    5460                 :           0 :       if (trueval == NULL_TREE)
    5461                 :           0 :         trueval = build_int_cst (itype, 1);
    5462                 :             :       else
    5463                 :           0 :         gcc_checking_assert (useless_type_conversion_p (itype,
    5464                 :             :                                                         TREE_TYPE (trueval)));
    5465                 :           0 :       pattern_stmt
    5466                 :           0 :         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
    5467                 :             :                                COND_EXPR, cond_expr, trueval,
    5468                 :           0 :                                build_int_cst (itype, 0));
    5469                 :           0 :       break;
    5470                 :             :     }
    5471                 :             : 
    5472                 :           0 :   gimple_set_location (pattern_stmt, loc);
    5473                 :           0 :   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
    5474                 :             :                           get_vectype_for_scalar_type (vinfo, itype));
    5475                 :           0 :   defs.put (var, gimple_assign_lhs (pattern_stmt));
    5476                 :           0 : }
    5477                 :             : 
    5478                 :             : /* Comparison function to qsort a vector of gimple stmts after UID.  */
    5479                 :             : 
    5480                 :             : static int
    5481                 :           0 : sort_after_uid (const void *p1, const void *p2)
    5482                 :             : {
    5483                 :           0 :   const gimple *stmt1 = *(const gimple * const *)p1;
    5484                 :           0 :   const gimple *stmt2 = *(const gimple * const *)p2;
    5485                 :           0 :   return gimple_uid (stmt1) - gimple_uid (stmt2);
    5486                 :             : }
    5487                 :             : 
    5488                 :             : /* Create pattern stmts for all stmts participating in the bool pattern
    5489                 :             :    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
    5490                 :             :    OUT_TYPE.  Return the def of the pattern root.  */
    5491                 :             : 
    5492                 :             : static tree
    5493                 :           0 : adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
    5494                 :             :                    tree out_type, stmt_vec_info stmt_info)
    5495                 :             : {
    5496                 :             :   /* Gather original stmts in the bool pattern in their order of appearance
    5497                 :             :      in the IL.  */
    5498                 :           0 :   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
    5499                 :           0 :   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
    5500                 :           0 :        i != bool_stmt_set.end (); ++i)
    5501                 :           0 :     bool_stmts.quick_push (*i);
    5502                 :           0 :   bool_stmts.qsort (sort_after_uid);
    5503                 :             : 
    5504                 :             :   /* Now process them in that order, producing pattern stmts.  */
    5505                 :           0 :   hash_map <tree, tree> defs;
    5506                 :           0 :   for (unsigned i = 0; i < bool_stmts.length (); ++i)
    5507                 :           0 :     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
    5508                 :             :                          out_type, stmt_info, defs);
    5509                 :             : 
    5510                 :             :   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
    5511                 :           0 :   gimple *pattern_stmt
    5512                 :           0 :     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    5513                 :           0 :   return gimple_assign_lhs (pattern_stmt);
    5514                 :           0 : }
    5515                 :             : 
    5516                 :             : /* Return the proper type for converting bool VAR into
    5517                 :             :    an integer value or NULL_TREE if no such type exists.
    5518                 :             :    The type is chosen so that the converted value has the
    5519                 :             :    same number of elements as VAR's vector type.  */
    5520                 :             : 
    5521                 :             : static tree
    5522                 :     2680591 : integer_type_for_mask (tree var, vec_info *vinfo)
    5523                 :             : {
    5524                 :     2680591 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5525                 :             :     return NULL_TREE;
    5526                 :             : 
    5527                 :      844149 :   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
    5528                 :      844149 :   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
    5529                 :             :     return NULL_TREE;
    5530                 :             : 
    5531                 :      432155 :   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
    5532                 :             : }
    5533                 :             : 
    5534                 :             : /* Function vect_recog_gcond_pattern
    5535                 :             : 
    5536                 :             :    Try to find pattern like following:
    5537                 :             : 
    5538                 :             :      if (a op b)
    5539                 :             : 
    5540                 :             :    where operator 'op' is not != and convert it to an adjusted boolean pattern
    5541                 :             : 
    5542                 :             :      mask = a op b
    5543                 :             :      if (mask != 0)
    5544                 :             : 
    5545                 :             :    and set the mask type on MASK.
    5546                 :             : 
    5547                 :             :    Input:
    5548                 :             : 
    5549                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5550                 :             :                  search begins, i.e. cast of a bool to
    5551                 :             :                  an integer type.
    5552                 :             : 
    5553                 :             :    Output:
    5554                 :             : 
    5555                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5556                 :             : 
    5557                 :             :    * Return value: A new stmt that will be used to replace the pattern.  */
    5558                 :             : 
    5559                 :             : static gimple *
    5560                 :    25109271 : vect_recog_gcond_pattern (vec_info *vinfo,
    5561                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5562                 :             : {
    5563                 :             :   /* Currently we only support this for loop vectorization and when multiple
    5564                 :             :      exits.  */
    5565                 :    25109271 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5566                 :     2899216 :   if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    5567                 :             :     return NULL;
    5568                 :             : 
    5569                 :      808566 :   gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
    5570                 :      808566 :   gcond* cond = NULL;
    5571                 :    25126758 :   if (!(cond = dyn_cast <gcond *> (last_stmt)))
    5572                 :             :     return NULL;
    5573                 :             : 
    5574                 :      214963 :   auto lhs = gimple_cond_lhs (cond);
    5575                 :      214963 :   auto rhs = gimple_cond_rhs (cond);
    5576                 :      214963 :   auto code = gimple_cond_code (cond);
    5577                 :             : 
    5578                 :      214963 :   tree scalar_type = TREE_TYPE (lhs);
    5579                 :      214963 :   if (VECTOR_TYPE_P (scalar_type))
    5580                 :             :     return NULL;
    5581                 :             : 
    5582                 :      214963 :   if (code == NE_EXPR
    5583                 :      126980 :       && zerop (rhs)
    5584                 :      267815 :       && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
    5585                 :             :     return NULL;
    5586                 :             : 
    5587                 :      210401 :   tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
    5588                 :      210401 :   if (vecitype == NULL_TREE)
    5589                 :             :     return NULL;
    5590                 :             : 
    5591                 :      197476 :   tree vectype = truth_type_for (vecitype);
    5592                 :             : 
    5593                 :      197476 :   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
    5594                 :      197476 :   gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
    5595                 :      197476 :   append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
    5596                 :             : 
    5597                 :      197476 :   gimple *pattern_stmt
    5598                 :      197476 :     = gimple_build_cond (NE_EXPR, new_lhs,
    5599                 :      197476 :                          build_int_cst (TREE_TYPE (new_lhs), 0),
    5600                 :             :                          NULL_TREE, NULL_TREE);
    5601                 :      197476 :   *type_out = vectype;
    5602                 :      197476 :   vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
    5603                 :      197476 :   return pattern_stmt;
    5604                 :             : }
    5605                 :             : 
    5606                 :             : /* Function vect_recog_bool_pattern
    5607                 :             : 
    5608                 :             :    Try to find pattern like following:
    5609                 :             : 
    5610                 :             :      bool a_b, b_b, c_b, d_b, e_b;
    5611                 :             :      TYPE f_T;
    5612                 :             :    loop:
    5613                 :             :      S1  a_b = x1 CMP1 y1;
    5614                 :             :      S2  b_b = x2 CMP2 y2;
    5615                 :             :      S3  c_b = a_b & b_b;
    5616                 :             :      S4  d_b = x3 CMP3 y3;
    5617                 :             :      S5  e_b = c_b | d_b;
    5618                 :             :      S6  f_T = (TYPE) e_b;
    5619                 :             : 
    5620                 :             :    where type 'TYPE' is an integral type.  Or a similar pattern
    5621                 :             :    ending in
    5622                 :             : 
    5623                 :             :      S6  f_Y = e_b ? r_Y : s_Y;
    5624                 :             : 
    5625                 :             :    as results from if-conversion of a complex condition.
    5626                 :             : 
    5627                 :             :    Input:
    5628                 :             : 
    5629                 :             :    * STMT_VINFO: The stmt at the end from which the pattern
    5630                 :             :                  search begins, i.e. cast of a bool to
    5631                 :             :                  an integer type.
    5632                 :             : 
    5633                 :             :    Output:
    5634                 :             : 
    5635                 :             :    * TYPE_OUT: The type of the output of this pattern.
    5636                 :             : 
    5637                 :             :    * Return value: A new stmt that will be used to replace the pattern.
    5638                 :             : 
    5639                 :             :         Assuming size of TYPE is the same as size of all comparisons
    5640                 :             :         (otherwise some casts would be added where needed), the above
    5641                 :             :         sequence we create related pattern stmts:
    5642                 :             :         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
    5643                 :             :         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
    5644                 :             :         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
    5645                 :             :         S5'  e_T = c_T | d_T;
    5646                 :             :         S6'  f_T = e_T;
    5647                 :             : 
    5648                 :             :         Instead of the above S3' we could emit:
    5649                 :             :         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
    5650                 :             :         S3'  c_T = a_T | b_T;
    5651                 :             :         but the above is more efficient.  */
    5652                 :             : 
    5653                 :             : static gimple *
    5654                 :    25109271 : vect_recog_bool_pattern (vec_info *vinfo,
    5655                 :             :                          stmt_vec_info stmt_vinfo, tree *type_out)
    5656                 :             : {
    5657                 :    25109271 :   gimple *last_stmt = stmt_vinfo->stmt;
    5658                 :    25109271 :   enum tree_code rhs_code;
    5659                 :    25109271 :   tree var, lhs, rhs, vectype;
    5660                 :    25109271 :   gimple *pattern_stmt;
    5661                 :             : 
    5662                 :    25109271 :   if (!is_gimple_assign (last_stmt))
    5663                 :             :     return NULL;
    5664                 :             : 
    5665                 :    17494215 :   var = gimple_assign_rhs1 (last_stmt);
    5666                 :    17494215 :   lhs = gimple_assign_lhs (last_stmt);
    5667                 :    17494215 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5668                 :             : 
    5669                 :    17494215 :   if (rhs_code == VIEW_CONVERT_EXPR)
    5670                 :      167704 :     var = TREE_OPERAND (var, 0);
    5671                 :             : 
    5672                 :    17494215 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
    5673                 :             :     return NULL;
    5674                 :             : 
    5675                 :      573595 :   hash_set<gimple *> bool_stmts;
    5676                 :             : 
    5677                 :      573595 :   if (CONVERT_EXPR_CODE_P (rhs_code)
    5678                 :      490984 :       || rhs_code == VIEW_CONVERT_EXPR)
    5679                 :             :     {
    5680                 :      171113 :       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    5681                 :      170987 :           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    5682                 :             :         return NULL;
    5683                 :       82513 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5684                 :             : 
    5685                 :       82513 :       if (check_bool_pattern (var, vinfo, bool_stmts))
    5686                 :             :         {
    5687                 :           0 :           rhs = adjust_bool_stmts (vinfo, bool_stmts,
    5688                 :           0 :                                    TREE_TYPE (lhs), stmt_vinfo);
    5689                 :           0 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5690                 :           0 :           if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    5691                 :           0 :             pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    5692                 :             :           else
    5693                 :           0 :             pattern_stmt
    5694                 :           0 :               = gimple_build_assign (lhs, NOP_EXPR, rhs);
    5695                 :             :         }
    5696                 :             :       else
    5697                 :             :         {
    5698                 :       82513 :           tree type = integer_type_for_mask (var, vinfo);
    5699                 :       82513 :           tree cst0, cst1, tmp;
    5700                 :             : 
    5701                 :       82513 :           if (!type)
    5702                 :             :             return NULL;
    5703                 :             : 
    5704                 :             :           /* We may directly use cond with narrowed type to avoid
    5705                 :             :              multiple cond exprs with following result packing and
    5706                 :             :              perform single cond with packed mask instead.  In case
    5707                 :             :              of widening we better make cond first and then extract
    5708                 :             :              results.  */
    5709                 :       37637 :           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
    5710                 :       26281 :             type = TREE_TYPE (lhs);
    5711                 :             : 
    5712                 :       37637 :           cst0 = build_int_cst (type, 0);
    5713                 :       37637 :           cst1 = build_int_cst (type, 1);
    5714                 :       37637 :           tmp = vect_recog_temp_ssa_var (type, NULL);
    5715                 :       37637 :           pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
    5716                 :             : 
    5717                 :       37637 :           if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
    5718                 :             :             {
    5719                 :       11356 :               tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5720                 :       11356 :               append_pattern_def_seq (vinfo, stmt_vinfo,
    5721                 :             :                                       pattern_stmt, new_vectype);
    5722                 :             : 
    5723                 :       11356 :               lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5724                 :       11356 :               pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
    5725                 :             :             }
    5726                 :             :         }
    5727                 :             : 
    5728                 :       37637 :       *type_out = vectype;
    5729                 :       37637 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5730                 :             : 
    5731                 :       37637 :       return pattern_stmt;
    5732                 :             :     }
    5733                 :      487700 :   else if (rhs_code == COND_EXPR
    5734                 :      110137 :            && TREE_CODE (var) == SSA_NAME)
    5735                 :             :     {
    5736                 :       68414 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5737                 :       68414 :       if (vectype == NULL_TREE)
    5738                 :             :         return NULL;
    5739                 :             : 
    5740                 :             :       /* Build a scalar type for the boolean result that when
    5741                 :             :          vectorized matches the vector type of the result in
    5742                 :             :          size and number of elements.  */
    5743                 :       60097 :       unsigned prec
    5744                 :       60097 :         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
    5745                 :             :                                TYPE_VECTOR_SUBPARTS (vectype));
    5746                 :             : 
    5747                 :       60097 :       tree type
    5748                 :      120194 :         = build_nonstandard_integer_type (prec,
    5749                 :       60097 :                                           TYPE_UNSIGNED (TREE_TYPE (var)));
    5750                 :       60097 :       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
    5751                 :             :         return NULL;
    5752                 :             : 
    5753                 :       60097 :       if (check_bool_pattern (var, vinfo, bool_stmts))
    5754                 :           0 :         var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
    5755                 :       60097 :       else if (integer_type_for_mask (var, vinfo))
    5756                 :             :         return NULL;
    5757                 :             : 
    5758                 :       21335 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5759                 :       21335 :       pattern_stmt 
    5760                 :       21335 :         = gimple_build_assign (lhs, COND_EXPR,
    5761                 :             :                                build2 (NE_EXPR, boolean_type_node,
    5762                 :       21335 :                                        var, build_int_cst (TREE_TYPE (var), 0)),
    5763                 :             :                                gimple_assign_rhs2 (last_stmt),
    5764                 :             :                                gimple_assign_rhs3 (last_stmt));
    5765                 :       21335 :       *type_out = vectype;
    5766                 :       21335 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5767                 :             : 
    5768                 :       21335 :       return pattern_stmt;
    5769                 :             :     }
    5770                 :      419286 :   else if (rhs_code == SSA_NAME
    5771                 :       16510 :            && STMT_VINFO_DATA_REF (stmt_vinfo))
    5772                 :             :     {
    5773                 :        7920 :       stmt_vec_info pattern_stmt_info;
    5774                 :        7920 :       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5775                 :        7920 :       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
    5776                 :           0 :         return NULL;
    5777                 :             : 
    5778                 :        7920 :       if (check_bool_pattern (var, vinfo, bool_stmts))
    5779                 :           0 :         rhs = adjust_bool_stmts (vinfo, bool_stmts,
    5780                 :           0 :                                  TREE_TYPE (vectype), stmt_vinfo);
    5781                 :             :       else
    5782                 :             :         {
    5783                 :        7920 :           tree type = integer_type_for_mask (var, vinfo);
    5784                 :        7920 :           tree cst0, cst1, new_vectype;
    5785                 :             : 
    5786                 :        7920 :           if (!type)
    5787                 :             :             return NULL;
    5788                 :             : 
    5789                 :        4536 :           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
    5790                 :         521 :             type = TREE_TYPE (vectype);
    5791                 :             : 
    5792                 :        4536 :           cst0 = build_int_cst (type, 0);
    5793                 :        4536 :           cst1 = build_int_cst (type, 1);
    5794                 :        4536 :           new_vectype = get_vectype_for_scalar_type (vinfo, type);
    5795                 :             : 
    5796                 :        4536 :           rhs = vect_recog_temp_ssa_var (type, NULL);
    5797                 :        4536 :           pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
    5798                 :        4536 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
    5799                 :             :         }
    5800                 :             : 
    5801                 :        4536 :       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
    5802                 :        4536 :       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
    5803                 :             :         {
    5804                 :        4015 :           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5805                 :        4015 :           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
    5806                 :        4015 :           append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
    5807                 :        4015 :           rhs = rhs2;
    5808                 :             :         }
    5809                 :        4536 :       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
    5810                 :        4536 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5811                 :        4536 :       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5812                 :        4536 :       *type_out = vectype;
    5813                 :        4536 :       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
    5814                 :             : 
    5815                 :        4536 :       return pattern_stmt;
    5816                 :             :     }
    5817                 :             :   else
    5818                 :             :     return NULL;
    5819                 :      573595 : }
    5820                 :             : 
    5821                 :             : 
    5822                 :             : /* A helper for vect_recog_mask_conversion_pattern.  Build
    5823                 :             :    conversion of MASK to a type suitable for masking VECTYPE.
    5824                 :             :    Built statement gets required vectype and is appended to
    5825                 :             :    a pattern sequence of STMT_VINFO.
    5826                 :             : 
    5827                 :             :    Return converted mask.  */
    5828                 :             : 
    5829                 :             : static tree
    5830                 :       32514 : build_mask_conversion (vec_info *vinfo,
    5831                 :             :                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
    5832                 :             : {
    5833                 :       32514 :   gimple *stmt;
    5834                 :       32514 :   tree masktype, tmp;
    5835                 :             : 
    5836                 :       32514 :   masktype = truth_type_for (vectype);
    5837                 :       32514 :   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
    5838                 :       32514 :   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
    5839                 :       32514 :   append_pattern_def_seq (vinfo, stmt_vinfo,
    5840                 :       32514 :                           stmt, masktype, TREE_TYPE (vectype));
    5841                 :             : 
    5842                 :       32514 :   return tmp;
    5843                 :             : }
    5844                 :             : 
    5845                 :             : 
    5846                 :             : /* Function vect_recog_mask_conversion_pattern
    5847                 :             : 
    5848                 :             :    Try to find statements which require boolean type
    5849                 :             :    converison.  Additional conversion statements are
    5850                 :             :    added to handle such cases.  For example:
    5851                 :             : 
    5852                 :             :    bool m_1, m_2, m_3;
    5853                 :             :    int i_4, i_5;
    5854                 :             :    double d_6, d_7;
    5855                 :             :    char c_1, c_2, c_3;
    5856                 :             : 
    5857                 :             :    S1   m_1 = i_4 > i_5;
    5858                 :             :    S2   m_2 = d_6 < d_7;
    5859                 :             :    S3   m_3 = m_1 & m_2;
    5860                 :             :    S4   c_1 = m_3 ? c_2 : c_3;
    5861                 :             : 
    5862                 :             :    Will be transformed into:
    5863                 :             : 
    5864                 :             :    S1   m_1 = i_4 > i_5;
    5865                 :             :    S2   m_2 = d_6 < d_7;
    5866                 :             :    S3'' m_2' = (_Bool[bitsize=32])m_2
    5867                 :             :    S3'  m_3' = m_1 & m_2';
    5868                 :             :    S4'' m_3'' = (_Bool[bitsize=8])m_3'
    5869                 :             :    S4'  c_1' = m_3'' ? c_2 : c_3;  */
    5870                 :             : 
    5871                 :             : static gimple *
    5872                 :    25065696 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
    5873                 :             :                                     stmt_vec_info stmt_vinfo, tree *type_out)
    5874                 :             : {
    5875                 :    25065696 :   gimple *last_stmt = stmt_vinfo->stmt;
    5876                 :    25065696 :   enum tree_code rhs_code;
    5877                 :    25065696 :   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
    5878                 :    25065696 :   tree vectype1, vectype2;
    5879                 :    25065696 :   stmt_vec_info pattern_stmt_info;
    5880                 :    25065696 :   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
    5881                 :    25065696 :   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
    5882                 :             : 
    5883                 :             :   /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
    5884                 :             :      conversion.  */
    5885                 :    25065696 :   if (is_gimple_call (last_stmt)
    5886                 :    25065696 :       && gimple_call_internal_p (last_stmt))
    5887                 :             :     {
    5888                 :       82444 :       gcall *pattern_stmt;
    5889                 :             : 
    5890                 :       82444 :       internal_fn ifn = gimple_call_internal_fn (last_stmt);
    5891                 :       82444 :       int mask_argno = internal_fn_mask_index (ifn);
    5892                 :       82444 :       if (mask_argno < 0)
    5893                 :             :         return NULL;
    5894                 :             : 
    5895                 :        9425 :       bool store_p = internal_store_fn_p (ifn);
    5896                 :        9425 :       bool load_p = internal_store_fn_p (ifn);
    5897                 :        9425 :       if (store_p)
    5898                 :             :         {
    5899                 :        2484 :           int rhs_index = internal_fn_stored_value_index (ifn);
    5900                 :        2484 :           tree rhs = gimple_call_arg (last_stmt, rhs_index);
    5901                 :        2484 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
    5902                 :             :         }
    5903                 :             :       else
    5904                 :             :         {
    5905                 :        6941 :           lhs = gimple_call_lhs (last_stmt);
    5906                 :        6941 :           if (!lhs)
    5907                 :             :             return NULL;
    5908                 :        6941 :           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5909                 :             :         }
    5910                 :             : 
    5911                 :        9425 :       if (!vectype1)
    5912                 :             :         return NULL;
    5913                 :             : 
    5914                 :        9314 :       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
    5915                 :        9314 :       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
    5916                 :        9314 :       if (mask_arg_type)
    5917                 :             :         {
    5918                 :        8297 :           vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
    5919                 :             : 
    5920                 :        8297 :           if (!vectype2
    5921                 :        8297 :               || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    5922                 :             :                            TYPE_VECTOR_SUBPARTS (vectype2)))
    5923                 :        5079 :             return NULL;
    5924                 :             :         }
    5925                 :        1017 :       else if (store_p || load_p)
    5926                 :             :         return NULL;
    5927                 :             : 
    5928                 :        4020 :       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
    5929                 :             : 
    5930                 :        4020 :       auto_vec<tree, 8> args;
    5931                 :        4020 :       unsigned int nargs = gimple_call_num_args (last_stmt);
    5932                 :        4020 :       args.safe_grow (nargs, true);
    5933                 :       18733 :       for (unsigned int i = 0; i < nargs; ++i)
    5934                 :       14713 :         args[i] = ((int) i == mask_argno
    5935                 :       14713 :                    ? tmp
    5936                 :       10693 :                    : gimple_call_arg (last_stmt, i));
    5937                 :        4020 :       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
    5938                 :             : 
    5939                 :        4020 :       if (!store_p)
    5940                 :             :         {
    5941                 :        3567 :           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    5942                 :        3567 :           gimple_call_set_lhs (pattern_stmt, lhs);
    5943                 :             :         }
    5944                 :             : 
    5945                 :        3567 :       if (load_p || store_p)
    5946                 :         453 :         gimple_call_set_nothrow (pattern_stmt, true);
    5947                 :             : 
    5948                 :        4020 :       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
    5949                 :        4020 :       if (STMT_VINFO_DATA_REF (stmt_vinfo))
    5950                 :        1820 :         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
    5951                 :             : 
    5952                 :        4020 :       *type_out = vectype1;
    5953                 :        4020 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    5954                 :             : 
    5955                 :        4020 :       return pattern_stmt;
    5956                 :        4020 :     }
    5957                 :             : 
    5958                 :    24983252 :   if (!is_gimple_assign (last_stmt))
    5959                 :             :     return NULL;
    5960                 :             : 
    5961                 :    17450640 :   gimple *pattern_stmt;
    5962                 :    17450640 :   lhs = gimple_assign_lhs (last_stmt);
    5963                 :    17450640 :   rhs1 = gimple_assign_rhs1 (last_stmt);
    5964                 :    17450640 :   rhs_code = gimple_assign_rhs_code (last_stmt);
    5965                 :             : 
    5966                 :             :   /* Check for cond expression requiring mask conversion.  */
    5967                 :    17450640 :   if (rhs_code == COND_EXPR)
    5968                 :             :     {
    5969                 :      104740 :       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
    5970                 :             : 
    5971                 :      104740 :       if (TREE_CODE (rhs1) == SSA_NAME)
    5972                 :             :         {
    5973                 :       62991 :           rhs1_type = integer_type_for_mask (rhs1, vinfo);
    5974                 :       62991 :           if (!rhs1_type)
    5975                 :             :             return NULL;
    5976                 :             :         }
    5977                 :       41749 :       else if (COMPARISON_CLASS_P (rhs1))
    5978                 :             :         {
    5979                 :             :           /* Check whether we're comparing scalar booleans and (if so)
    5980                 :             :              whether a better mask type exists than the mask associated
    5981                 :             :              with boolean-sized elements.  This avoids unnecessary packs
    5982                 :             :              and unpacks if the booleans are set from comparisons of
    5983                 :             :              wider types.  E.g. in:
    5984                 :             : 
    5985                 :             :                int x1, x2, x3, x4, y1, y1;
    5986                 :             :                ...
    5987                 :             :                bool b1 = (x1 == x2);
    5988                 :             :                bool b2 = (x3 == x4);
    5989                 :             :                ... = b1 == b2 ? y1 : y2;
    5990                 :             : 
    5991                 :             :              it is better for b1 and b2 to use the mask type associated
    5992                 :             :              with int elements rather bool (byte) elements.  */
    5993                 :       41749 :           rhs1_op0 = TREE_OPERAND (rhs1, 0);
    5994                 :       41749 :           rhs1_op1 = TREE_OPERAND (rhs1, 1);
    5995                 :       41749 :           if (!rhs1_op0 || !rhs1_op1)
    5996                 :             :             return NULL;
    5997                 :       41749 :           rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
    5998                 :       41749 :           rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
    5999                 :             : 
    6000                 :       41749 :           if (!rhs1_op0_type)
    6001                 :       41749 :             rhs1_type = TREE_TYPE (rhs1_op0);
    6002                 :           0 :           else if (!rhs1_op1_type)
    6003                 :           0 :             rhs1_type = TREE_TYPE (rhs1_op1);
    6004                 :           0 :           else if (TYPE_PRECISION (rhs1_op0_type)
    6005                 :           0 :                    != TYPE_PRECISION (rhs1_op1_type))
    6006                 :             :             {
    6007                 :           0 :               int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
    6008                 :           0 :                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
    6009                 :           0 :               int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
    6010                 :           0 :                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
    6011                 :           0 :               if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
    6012                 :             :                 {
    6013                 :           0 :                   if (abs (tmp0) > abs (tmp1))
    6014                 :             :                     rhs1_type = rhs1_op1_type;
    6015                 :             :                   else
    6016                 :           0 :                     rhs1_type = rhs1_op0_type;
    6017                 :             :                 }
    6018                 :             :               else
    6019                 :           0 :                 rhs1_type = build_nonstandard_integer_type
    6020                 :           0 :                   (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
    6021                 :             :             }
    6022                 :             :           else
    6023                 :             :             rhs1_type = rhs1_op0_type;
    6024                 :             :         }
    6025                 :             :       else
    6026                 :             :         return NULL;
    6027                 :             : 
    6028                 :       97923 :       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6029                 :             : 
    6030                 :       97923 :       if (!vectype1 || !vectype2)
    6031                 :             :         return NULL;
    6032                 :             : 
    6033                 :             :       /* Continue if a conversion is needed.  Also continue if we have
    6034                 :             :          a comparison whose vector type would normally be different from
    6035                 :             :          VECTYPE2 when considered in isolation.  In that case we'll
    6036                 :             :          replace the comparison with an SSA name (so that we can record
    6037                 :             :          its vector type) and behave as though the comparison was an SSA
    6038                 :             :          name from the outset.  */
    6039                 :       96403 :       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
    6040                 :             :                     TYPE_VECTOR_SUBPARTS (vectype2))
    6041                 :       80303 :           && !rhs1_op0_type
    6042                 :       96403 :           && !rhs1_op1_type)
    6043                 :       80303 :         return NULL;
    6044                 :             : 
    6045                 :             :       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
    6046                 :             :          in place, we can handle it in vectorizable_condition.  This avoids
    6047                 :             :          unnecessary promotion stmts and increased vectorization factor.  */
    6048                 :       16100 :       if (COMPARISON_CLASS_P (rhs1)
    6049                 :          20 :           && INTEGRAL_TYPE_P (rhs1_type)
    6050                 :       16120 :           && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
    6051                 :             :                        TYPE_VECTOR_SUBPARTS (vectype2)))
    6052                 :             :         {
    6053                 :          20 :           enum vect_def_type dt;
    6054                 :          20 :           if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
    6055                 :          20 :               && dt == vect_external_def
    6056                 :           0 :               && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
    6057                 :          20 :               && (dt == vect_external_def
    6058                 :           0 :                   || dt == vect_constant_def))
    6059                 :             :             {
    6060                 :           0 :               tree wide_scalar_type = build_nonstandard_integer_type
    6061                 :           0 :                 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
    6062                 :           0 :               tree vectype3 = get_vectype_for_scalar_type (vinfo,
    6063                 :             :                                                            wide_scalar_type);
    6064                 :           0 :               if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
    6065                 :           0 :                 return NULL;
    6066                 :             :             }
    6067                 :             :         }
    6068                 :             : 
    6069                 :             :       /* If rhs1 is a comparison we need to move it into a
    6070                 :             :          separate statement.  */
    6071                 :       16100 :       if (TREE_CODE (rhs1) != SSA_NAME)
    6072                 :             :         {
    6073                 :          20 :           tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
    6074                 :          20 :           if (rhs1_op0_type
    6075                 :          20 :               && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
    6076                 :           0 :             rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
    6077                 :             :                                               vectype2, stmt_vinfo);
    6078                 :          20 :           if (rhs1_op1_type
    6079                 :          20 :               && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
    6080                 :           0 :             rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
    6081                 :             :                                       vectype2, stmt_vinfo);
    6082                 :          20 :           pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
    6083                 :             :                                               rhs1_op0, rhs1_op1);
    6084                 :          20 :           rhs1 = tmp;
    6085                 :          20 :           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
    6086                 :             :                                   rhs1_type);
    6087                 :             :         }
    6088                 :             : 
    6089                 :       16100 :       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
    6090                 :       32200 :                     TYPE_VECTOR_SUBPARTS (vectype2)))
    6091                 :       16100 :         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6092                 :             :       else
    6093                 :             :         tmp = rhs1;
    6094                 :             : 
    6095                 :       16100 :       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6096                 :       16100 :       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
    6097                 :             :                                           gimple_assign_rhs2 (last_stmt),
    6098                 :             :                                           gimple_assign_rhs3 (last_stmt));
    6099                 :             : 
    6100                 :       16100 :       *type_out = vectype1;
    6101                 :       16100 :       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6102                 :             : 
    6103                 :       16100 :       return pattern_stmt;
    6104                 :             :     }
    6105                 :             : 
    6106                 :             :   /* Now check for binary boolean operations requiring conversion for
    6107                 :             :      one of operands.  */
    6108                 :    17345900 :   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6109                 :             :     return NULL;
    6110                 :             : 
    6111                 :     1287529 :   if (rhs_code != BIT_IOR_EXPR
    6112                 :             :       && rhs_code != BIT_XOR_EXPR
    6113                 :     1287529 :       && rhs_code != BIT_AND_EXPR
    6114                 :      979850 :       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
    6115                 :             :     return NULL;
    6116                 :             : 
    6117                 :     1187129 :   rhs2 = gimple_assign_rhs2 (last_stmt);
    6118                 :             : 
    6119                 :     1187129 :   rhs1_type = integer_type_for_mask (rhs1, vinfo);
    6120                 :     1187129 :   rhs2_type = integer_type_for_mask (rhs2, vinfo);
    6121                 :             : 
    6122                 :     1187129 :   if (!rhs1_type || !rhs2_type
    6123                 :     1187129 :       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
    6124                 :             :     return NULL;
    6125                 :             : 
    6126                 :       12394 :   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
    6127                 :             :     {
    6128                 :        8120 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
    6129                 :        8120 :       if (!vectype1)
    6130                 :             :         return NULL;
    6131                 :        8120 :       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
    6132                 :             :     }
    6133                 :             :   else
    6134                 :             :     {
    6135                 :        4274 :       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
    6136                 :        4274 :       if (!vectype1)
    6137                 :             :         return NULL;
    6138                 :        4274 :       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
    6139                 :             :     }
    6140                 :             : 
    6141                 :       12394 :   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
    6142                 :       12394 :   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
    6143                 :             : 
    6144                 :       12394 :   *type_out = vectype1;
    6145                 :       12394 :   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
    6146                 :             : 
    6147                 :       12394 :   return pattern_stmt;
    6148                 :             : }
    6149                 :             : 
    6150                 :             : /* STMT_INFO is a load or store.  If the load or store is conditional, return
    6151                 :             :    the boolean condition under which it occurs, otherwise return null.  */
    6152                 :             : 
    6153                 :             : static tree
    6154                 :       28959 : vect_get_load_store_mask (stmt_vec_info stmt_info)
    6155                 :             : {
    6156                 :       28959 :   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
    6157                 :             :     {
    6158                 :       27529 :       gcc_assert (gimple_assign_single_p (def_assign));
    6159                 :             :       return NULL_TREE;
    6160                 :             :     }
    6161                 :             : 
    6162                 :        1430 :   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
    6163                 :             :     {
    6164                 :        1430 :       internal_fn ifn = gimple_call_internal_fn (def_call);
    6165                 :        1430 :       int mask_index = internal_fn_mask_index (ifn);
    6166                 :        1430 :       return gimple_call_arg (def_call, mask_index);
    6167                 :             :     }
    6168                 :             : 
    6169                 :           0 :   gcc_unreachable ();
    6170                 :             : }
    6171                 :             : 
    6172                 :             : /* Return MASK if MASK is suitable for masking an operation on vectors
    6173                 :             :    of type VECTYPE, otherwise convert it into such a form and return
    6174                 :             :    the result.  Associate any conversion statements with STMT_INFO's
    6175                 :             :    pattern.  */
    6176                 :             : 
    6177                 :             : static tree
    6178                 :           0 : vect_convert_mask_for_vectype (tree mask, tree vectype,
    6179                 :             :                                stmt_vec_info stmt_info, vec_info *vinfo)
    6180                 :             : {
    6181                 :           0 :   tree mask_type = integer_type_for_mask (mask, vinfo);
    6182                 :           0 :   if (mask_type)
    6183                 :             :     {
    6184                 :           0 :       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
    6185                 :           0 :       if (mask_vectype
    6186                 :           0 :           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
    6187                 :           0 :                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
    6188                 :           0 :         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
    6189                 :             :     }
    6190                 :           0 :   return mask;
    6191                 :             : }
    6192                 :             : 
    6193                 :             : /* Return the equivalent of:
    6194                 :             : 
    6195                 :             :      fold_convert (TYPE, VALUE)
    6196                 :             : 
    6197                 :             :    with the expectation that the operation will be vectorized.
    6198                 :             :    If new statements are needed, add them as pattern statements
    6199                 :             :    to STMT_INFO.  */
    6200                 :             : 
    6201                 :             : static tree
    6202                 :           0 : vect_add_conversion_to_pattern (vec_info *vinfo,
    6203                 :             :                                 tree type, tree value, stmt_vec_info stmt_info)
    6204                 :             : {
    6205                 :           0 :   if (useless_type_conversion_p (type, TREE_TYPE (value)))
    6206                 :             :     return value;
    6207                 :             : 
    6208                 :           0 :   tree new_value = vect_recog_temp_ssa_var (type, NULL);
    6209                 :           0 :   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
    6210                 :           0 :   append_pattern_def_seq (vinfo, stmt_info, conversion,
    6211                 :             :                           get_vectype_for_scalar_type (vinfo, type));
    6212                 :           0 :   return new_value;
    6213                 :             : }
    6214                 :             : 
    6215                 :             : /* Try to convert STMT_INFO into a call to a gather load or scatter store
    6216                 :             :    internal function.  Return the final statement on success and set
    6217                 :             :    *TYPE_OUT to the vector type being loaded or stored.
    6218                 :             : 
    6219                 :             :    This function only handles gathers and scatters that were recognized
    6220                 :             :    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
    6221                 :             : 
    6222                 :             : static gimple *
    6223                 :    25065696 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
    6224                 :             :                                    stmt_vec_info stmt_info, tree *type_out)
    6225                 :             : {
    6226                 :             :   /* Currently we only support this for loop vectorization.  */
    6227                 :    27958991 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6228                 :     2893295 :   if (!loop_vinfo)
    6229                 :             :     return NULL;
    6230                 :             : 
    6231                 :             :   /* Make sure that we're looking at a gather load or scatter store.  */
    6232                 :     2893295 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    6233                 :     2893295 :   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    6234                 :             :     return NULL;
    6235                 :             : 
    6236                 :             :   /* Get the boolean that controls whether the load or store happens.
    6237                 :             :      This is null if the operation is unconditional.  */
    6238                 :       28959 :   tree mask = vect_get_load_store_mask (stmt_info);
    6239                 :             : 
    6240                 :             :   /* Make sure that the target supports an appropriate internal
    6241                 :             :      function for the gather/scatter operation.  */
    6242                 :       28959 :   gather_scatter_info gs_info;
    6243                 :       28959 :   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
    6244                 :       28959 :       || gs_info.ifn == IFN_LAST)
    6245                 :             :     return NULL;
    6246                 :             : 
    6247                 :             :   /* Convert the mask to the right form.  */
    6248                 :           0 :   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
    6249                 :             :                                                  gs_info.element_type);
    6250                 :           0 :   if (mask)
    6251                 :           0 :     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
    6252                 :             :                                           loop_vinfo);
    6253                 :           0 :   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
    6254                 :           0 :            || gs_info.ifn == IFN_MASK_GATHER_LOAD
    6255                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
    6256                 :           0 :            || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
    6257                 :           0 :     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
    6258                 :             : 
    6259                 :             :   /* Get the invariant base and non-invariant offset, converting the
    6260                 :             :      latter to the same width as the vector elements.  */
    6261                 :           0 :   tree base = gs_info.base;
    6262                 :           0 :   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
    6263                 :           0 :   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
    6264                 :             :                                                 gs_info.offset, stmt_info);
    6265                 :             : 
    6266                 :             :   /* Build the new pattern statement.  */
    6267                 :           0 :   tree scale = size_int (gs_info.scale);
    6268                 :           0 :   gcall *pattern_stmt;
    6269                 :           0 :   if (DR_IS_READ (dr))
    6270                 :             :     {
    6271                 :           0 :       tree zero = build_zero_cst (gs_info.element_type);
    6272                 :           0 :       if (mask != NULL)
    6273                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
    6274                 :             :                                                    offset, scale, zero, mask);
    6275                 :             :       else
    6276                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
    6277                 :             :                                                    offset, scale, zero);
    6278                 :           0 :       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
    6279                 :           0 :       gimple_call_set_lhs (pattern_stmt, load_lhs);
    6280                 :             :     }
    6281                 :             :   else
    6282                 :             :     {
    6283                 :           0 :       tree rhs = vect_get_store_rhs (stmt_info);
    6284                 :           0 :       if (mask != NULL)
    6285                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
    6286                 :             :                                                    base, offset, scale, rhs,
    6287                 :             :                                                    mask);
    6288                 :             :       else
    6289                 :           0 :         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
    6290                 :             :                                                    base, offset, scale, rhs);
    6291                 :             :     }
    6292                 :           0 :   gimple_call_set_nothrow (pattern_stmt, true);
    6293                 :             : 
    6294                 :             :   /* Copy across relevant vectorization info and associate DR with the
    6295                 :             :      new pattern statement instead of the original statement.  */
    6296                 :           0 :   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
    6297                 :           0 :   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
    6298                 :             : 
    6299                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6300                 :           0 :   *type_out = vectype;
    6301                 :           0 :   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
    6302                 :             : 
    6303                 :           0 :   return pattern_stmt;
    6304                 :             : }
    6305                 :             : 
    6306                 :             : /* Return true if TYPE is a non-boolean integer type.  These are the types
    6307                 :             :    that we want to consider for narrowing.  */
    6308                 :             : 
    6309                 :             : static bool
    6310                 :    50243102 : vect_narrowable_type_p (tree type)
    6311                 :             : {
    6312                 :    50243102 :   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
    6313                 :             : }
    6314                 :             : 
    6315                 :             : /* Return true if the operation given by CODE can be truncated to N bits
    6316                 :             :    when only N bits of the output are needed.  This is only true if bit N+1
    6317                 :             :    of the inputs has no effect on the low N bits of the result.  */
    6318                 :             : 
    6319                 :             : static bool
    6320                 :    12005717 : vect_truncatable_operation_p (tree_code code)
    6321                 :             : {
    6322                 :    12005717 :   switch (code)
    6323                 :             :     {
    6324                 :             :     case NEGATE_EXPR:
    6325                 :             :     case PLUS_EXPR:
    6326                 :             :     case MINUS_EXPR:
    6327                 :             :     case MULT_EXPR:
    6328                 :             :     case BIT_NOT_EXPR:
    6329                 :             :     case BIT_AND_EXPR:
    6330                 :             :     case BIT_IOR_EXPR:
    6331                 :             :     case BIT_XOR_EXPR:
    6332                 :             :     case COND_EXPR:
    6333                 :             :       return true;
    6334                 :             : 
    6335                 :     4812918 :     default:
    6336                 :     4812918 :       return false;
    6337                 :             :     }
    6338                 :             : }
    6339                 :             : 
    6340                 :             : /* Record that STMT_INFO could be changed from operating on TYPE to
    6341                 :             :    operating on a type with the precision and sign given by PRECISION
    6342                 :             :    and SIGN respectively.  PRECISION is an arbitrary bit precision;
    6343                 :             :    it might not be a whole number of bytes.  */
    6344                 :             : 
    6345                 :             : static void
    6346                 :     1575525 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
    6347                 :             :                          unsigned int precision, signop sign)
    6348                 :             : {
    6349                 :             :   /* Round the precision up to a whole number of bytes.  */
    6350                 :     1575525 :   precision = vect_element_precision (precision);
    6351                 :     1575525 :   if (precision < TYPE_PRECISION (type)
    6352                 :     1575525 :       && (!stmt_info->operation_precision
    6353                 :       32827 :           || stmt_info->operation_precision > precision))
    6354                 :             :     {
    6355                 :      957151 :       stmt_info->operation_precision = precision;
    6356                 :      957151 :       stmt_info->operation_sign = sign;
    6357                 :             :     }
    6358                 :     1575525 : }
    6359                 :             : 
    6360                 :             : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
    6361                 :             :    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
    6362                 :             :    is an arbitrary bit precision; it might not be a whole number of bytes.  */
    6363                 :             : 
    6364                 :             : static void
    6365                 :     8541799 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
    6366                 :             :                               unsigned int min_input_precision)
    6367                 :             : {
    6368                 :             :   /* This operation in isolation only requires the inputs to have
    6369                 :             :      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
    6370                 :             :      that MIN_INPUT_PRECISION is a natural precision for the chain
    6371                 :             :      as a whole.  E.g. consider something like:
    6372                 :             : 
    6373                 :             :          unsigned short *x, *y;
    6374                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6375                 :             : 
    6376                 :             :      The right shift can be done on unsigned chars, and only requires the
    6377                 :             :      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
    6378                 :             :      approach would mean turning a natural chain of single-vector unsigned
    6379                 :             :      short operations into one that truncates "*x" and then extends
    6380                 :             :      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
    6381                 :             :      operation and one vector for each unsigned char operation.
    6382                 :             :      This would be a significant pessimization.
    6383                 :             : 
    6384                 :             :      Instead only propagate the maximum of this precision and the precision
    6385                 :             :      required by the users of the result.  This means that we don't pessimize
    6386                 :             :      the case above but continue to optimize things like:
    6387                 :             : 
    6388                 :             :          unsigned char *y;
    6389                 :             :          unsigned short *x;
    6390                 :             :          *y = ((*x & 0xf0) >> 4) | (*y << 4);
    6391                 :             : 
    6392                 :             :      Here we would truncate two vectors of *x to a single vector of
    6393                 :             :      unsigned chars and use single-vector unsigned char operations for
    6394                 :             :      everything else, rather than doing two unsigned short copies of
    6395                 :             :      "(*x & 0xf0) >> 4" and then truncating the result.  */
    6396                 :     8541799 :   min_input_precision = MAX (min_input_precision,
    6397                 :             :                              stmt_info->min_output_precision);
    6398                 :             : 
    6399                 :     8541799 :   if (min_input_precision < TYPE_PRECISION (type)
    6400                 :     8541799 :       && (!stmt_info->min_input_precision
    6401                 :       55309 :           || stmt_info->min_input_precision > min_input_precision))
    6402                 :      497674 :     stmt_info->min_input_precision = min_input_precision;
    6403                 :     8541799 : }
    6404                 :             : 
    6405                 :             : /* Subroutine of vect_determine_min_output_precision.  Return true if
    6406                 :             :    we can calculate a reduced number of output bits for STMT_INFO,
    6407                 :             :    whose result is LHS.  */
    6408                 :             : 
    6409                 :             : static bool
    6410                 :    11598215 : vect_determine_min_output_precision_1 (vec_info *vinfo,
    6411                 :             :                                        stmt_vec_info stmt_info, tree lhs)
    6412                 :             : {
    6413                 :             :   /* Take the maximum precision required by users of the result.  */
    6414                 :    11598215 :   unsigned int precision = 0;
    6415                 :    11598215 :   imm_use_iterator iter;
    6416                 :    11598215 :   use_operand_p use;
    6417                 :    12198214 :   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
    6418                 :             :     {
    6419                 :    11947491 :       gimple *use_stmt = USE_STMT (use);
    6420                 :    11947491 :       if (is_gimple_debug (use_stmt))
    6421                 :      343975 :         continue;
    6422                 :    11603516 :       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
    6423                 :    11603516 :       if (!use_stmt_info || !use_stmt_info->min_input_precision)
    6424                 :             :         return false;
    6425                 :             :       /* The input precision recorded for COND_EXPRs applies only to the
    6426                 :             :          "then" and "else" values.  */
    6427                 :      256436 :       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
    6428                 :      225010 :       if (assign
    6429                 :      225010 :           && gimple_assign_rhs_code (assign) == COND_EXPR
    6430                 :         412 :           && use->use != gimple_assign_rhs2_ptr (assign)
    6431                 :         412 :           && use->use != gimple_assign_rhs3_ptr (assign))
    6432                 :             :         return false;
    6433                 :      256024 :       precision = MAX (precision, use_stmt_info->min_input_precision);
    6434                 :             :     }
    6435                 :             : 
    6436                 :      250723 :   if (dump_enabled_p ())
    6437                 :        6901 :     dump_printf_loc (MSG_NOTE, vect_location,
    6438                 :             :                      "only the low %d bits of %T are significant\n",
    6439                 :             :                      precision, lhs);
    6440                 :      250723 :   stmt_info->min_output_precision = precision;
    6441                 :      250723 :   return true;
    6442                 :             : }
    6443                 :             : 
    6444                 :             : /* Calculate min_output_precision for STMT_INFO.  */
    6445                 :             : 
    6446                 :             : static void
    6447                 :    30529773 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6448                 :             : {
    6449                 :             :   /* We're only interested in statements with a narrowable result.  */
    6450                 :    30529773 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6451                 :    30529773 :   if (!lhs
    6452                 :    24070591 :       || TREE_CODE (lhs) != SSA_NAME
    6453                 :    50455005 :       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
    6454                 :             :     return;
    6455                 :             : 
    6456                 :    11598215 :   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
    6457                 :    11347492 :     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
    6458                 :             : }
    6459                 :             : 
    6460                 :             : /* Use range information to decide whether STMT (described by STMT_INFO)
    6461                 :             :    could be done in a narrower type.  This is effectively a forward
    6462                 :             :    propagation, since it uses context-independent information that applies
    6463                 :             :    to all users of an SSA name.  */
    6464                 :             : 
    6465                 :             : static void
    6466                 :    17098120 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
    6467                 :             : {
    6468                 :    17098120 :   tree lhs = gimple_assign_lhs (stmt);
    6469                 :    17098120 :   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    6470                 :    15622663 :     return;
    6471                 :             : 
    6472                 :    13219750 :   tree type = TREE_TYPE (lhs);
    6473                 :    13219750 :   if (!vect_narrowable_type_p (type))
    6474                 :             :     return;
    6475                 :             : 
    6476                 :             :   /* First see whether we have any useful range information for the result.  */
    6477                 :     8866647 :   unsigned int precision = TYPE_PRECISION (type);
    6478                 :     8866647 :   signop sign = TYPE_SIGN (type);
    6479                 :     8866647 :   wide_int min_value, max_value;
    6480                 :     8866647 :   if (!vect_get_range_info (lhs, &min_value, &max_value))
    6481                 :             :     return;
    6482                 :             : 
    6483                 :     3861879 :   tree_code code = gimple_assign_rhs_code (stmt);
    6484                 :     3861879 :   unsigned int nops = gimple_num_ops (stmt);
    6485                 :             : 
    6486                 :     3861879 :   if (!vect_truncatable_operation_p (code))
    6487                 :             :     {
    6488                 :             :       /* Handle operations that can be computed in type T if all inputs
    6489                 :             :          and outputs can be represented in type T.  Also handle left and
    6490                 :             :          right shifts, where (in addition) the maximum shift amount must
    6491                 :             :          be less than the number of bits in T.  */
    6492                 :     1468387 :       bool is_shift;
    6493                 :     1468387 :       switch (code)
    6494                 :             :         {
    6495                 :             :         case LSHIFT_EXPR:
    6496                 :             :         case RSHIFT_EXPR:
    6497                 :             :           is_shift = true;
    6498                 :             :           break;
    6499                 :             : 
    6500                 :      166900 :         case ABS_EXPR:
    6501                 :      166900 :         case MIN_EXPR:
    6502                 :      166900 :         case MAX_EXPR:
    6503                 :      166900 :         case TRUNC_DIV_EXPR:
    6504                 :      166900 :         case CEIL_DIV_EXPR:
    6505                 :      166900 :         case FLOOR_DIV_EXPR:
    6506                 :      166900 :         case ROUND_DIV_EXPR:
    6507                 :      166900 :         case EXACT_DIV_EXPR:
    6508                 :             :           /* Modulus is excluded because it is typically calculated by doing
    6509                 :             :              a division, for which minimum signed / -1 isn't representable in
    6510                 :             :              the original signed type.  We could take the division range into
    6511                 :             :              account instead, if handling modulus ever becomes important.  */
    6512                 :      166900 :           is_shift = false;
    6513                 :      166900 :           break;
    6514                 :             : 
    6515                 :             :         default:
    6516                 :             :           return;
    6517                 :             :         }
    6518                 :      941897 :       for (unsigned int i = 1; i < nops; ++i)
    6519                 :             :         {
    6520                 :      734301 :           tree op = gimple_op (stmt, i);
    6521                 :      734301 :           wide_int op_min_value, op_max_value;
    6522                 :      734301 :           if (TREE_CODE (op) == INTEGER_CST)
    6523                 :             :             {
    6524                 :      209717 :               unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
    6525                 :      209717 :               op_min_value = op_max_value = wi::to_wide (op, op_precision);
    6526                 :             :             }
    6527                 :      524584 :           else if (TREE_CODE (op) == SSA_NAME)
    6528                 :             :             {
    6529                 :      524584 :               if (!vect_get_range_info (op, &op_min_value, &op_max_value))
    6530                 :             :                 return;
    6531                 :             :             }
    6532                 :             :           else
    6533                 :             :             return;
    6534                 :             : 
    6535                 :      466764 :           if (is_shift && i == 2)
    6536                 :             :             {
    6537                 :             :               /* There needs to be one more bit than the maximum shift amount.
    6538                 :             : 
    6539                 :             :                  If the maximum shift amount is already 1 less than PRECISION
    6540                 :             :                  then we can't narrow the shift further.  Dealing with that
    6541                 :             :                  case first ensures that we can safely use an unsigned range
    6542                 :             :                  below.
    6543                 :             : 
    6544                 :             :                  op_min_value isn't relevant, since shifts by negative amounts
    6545                 :             :                  are UB.  */
    6546                 :      153640 :               if (wi::geu_p (op_max_value, precision - 1))
    6547                 :             :                 return;
    6548                 :      140497 :               unsigned int min_bits = op_max_value.to_uhwi () + 1;
    6549                 :             : 
    6550                 :             :               /* As explained below, we can convert a signed shift into an
    6551                 :             :                  unsigned shift if the sign bit is always clear.  At this
    6552                 :             :                  point we've already processed the ranges of the output and
    6553                 :             :                  the first input.  */
    6554                 :      140497 :               auto op_sign = sign;
    6555                 :      140497 :               if (sign == SIGNED && !wi::neg_p (min_value))
    6556                 :             :                 op_sign = UNSIGNED;
    6557                 :      280994 :               op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
    6558                 :      140497 :                                              precision, op_sign);
    6559                 :      280994 :               op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
    6560                 :      140497 :                                              precision, op_sign);
    6561                 :             :             }
    6562                 :      453621 :           min_value = wi::min (min_value, op_min_value, sign);
    6563                 :      453621 :           max_value = wi::max (max_value, op_max_value, sign);
    6564                 :      734301 :         }
    6565                 :             :     }
    6566                 :             : 
    6567                 :             :   /* Try to switch signed types for unsigned types if we can.
    6568                 :             :      This is better for two reasons.  First, unsigned ops tend
    6569                 :             :      to be cheaper than signed ops.  Second, it means that we can
    6570                 :             :      handle things like:
    6571                 :             : 
    6572                 :             :         signed char c;
    6573                 :             :         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
    6574                 :             : 
    6575                 :             :      as:
    6576                 :             : 
    6577                 :             :         signed char c;
    6578                 :             :         unsigned short res_1 = (unsigned short) c & 0xff00;
    6579                 :             :         int res = (int) res_1;
    6580                 :             : 
    6581                 :             :      where the intermediate result res_1 has unsigned rather than
    6582                 :             :      signed type.  */
    6583                 :     2601088 :   if (sign == SIGNED && !wi::neg_p (min_value))
    6584                 :             :     sign = UNSIGNED;
    6585                 :             : 
    6586                 :             :   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
    6587                 :     2601088 :   unsigned int precision1 = wi::min_precision (min_value, sign);
    6588                 :     2601088 :   unsigned int precision2 = wi::min_precision (max_value, sign);
    6589                 :     2601088 :   unsigned int value_precision = MAX (precision1, precision2);
    6590                 :     2601088 :   if (value_precision >= precision)
    6591                 :             :     return;
    6592                 :             : 
    6593                 :     1475457 :   if (dump_enabled_p ())
    6594                 :       75049 :     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6595                 :             :                      " without loss of precision: %G",
    6596                 :             :                      sign == SIGNED ? "signed" : "unsigned",
    6597                 :             :                      value_precision, (gimple *) stmt);
    6598                 :             : 
    6599                 :     1475457 :   vect_set_operation_type (stmt_info, type, value_precision, sign);
    6600                 :     1475457 :   vect_set_min_input_precision (stmt_info, type, value_precision);
    6601                 :     8866647 : }
    6602                 :             : 
    6603                 :             : /* Use information about the users of STMT's result to decide whether
    6604                 :             :    STMT (described by STMT_INFO) could be done in a narrower type.
    6605                 :             :    This is effectively a backward propagation.  */
    6606                 :             : 
    6607                 :             : static void
    6608                 :    17098120 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
    6609                 :             : {
    6610                 :    17098120 :   tree_code code = gimple_assign_rhs_code (stmt);
    6611                 :    17098120 :   unsigned int opno = (code == COND_EXPR ? 2 : 1);
    6612                 :    17098120 :   tree type = TREE_TYPE (gimple_op (stmt, opno));
    6613                 :    17098120 :   if (!vect_narrowable_type_p (type))
    6614                 :    10031778 :     return;
    6615                 :             : 
    6616                 :    10498305 :   unsigned int precision = TYPE_PRECISION (type);
    6617                 :    10498305 :   unsigned int operation_precision, min_input_precision;
    6618                 :    10498305 :   switch (code)
    6619                 :             :     {
    6620                 :     1943396 :     CASE_CONVERT:
    6621                 :             :       /* Only the bits that contribute to the output matter.  Don't change
    6622                 :             :          the precision of the operation itself.  */
    6623                 :     1943396 :       operation_precision = precision;
    6624                 :     1943396 :       min_input_precision = stmt_info->min_output_precision;
    6625                 :     1943396 :       break;
    6626                 :             : 
    6627                 :      411071 :     case LSHIFT_EXPR:
    6628                 :      411071 :     case RSHIFT_EXPR:
    6629                 :      411071 :       {
    6630                 :      411071 :         tree shift = gimple_assign_rhs2 (stmt);
    6631                 :      411071 :         if (TREE_CODE (shift) != INTEGER_CST
    6632                 :      734746 :             || !wi::ltu_p (wi::to_widest (shift), precision))
    6633                 :       87432 :           return;
    6634                 :      323639 :         unsigned int const_shift = TREE_INT_CST_LOW (shift);
    6635                 :      323639 :         if (code == LSHIFT_EXPR)
    6636                 :             :           {
    6637                 :             :             /* Avoid creating an undefined shift.
    6638                 :             : 
    6639                 :             :                ??? We could instead use min_output_precision as-is and
    6640                 :             :                optimize out-of-range shifts to zero.  However, only
    6641                 :             :                degenerate testcases shift away all their useful input data,
    6642                 :             :                and it isn't natural to drop input operations in the middle
    6643                 :             :                of vectorization.  This sort of thing should really be
    6644                 :             :                handled before vectorization.  */
    6645                 :       79517 :             operation_precision = MAX (stmt_info->min_output_precision,
    6646                 :             :                                        const_shift + 1);
    6647                 :             :             /* We need CONST_SHIFT fewer bits of the input.  */
    6648                 :       79517 :             min_input_precision = (MAX (operation_precision, const_shift)
    6649                 :       79517 :                                    - const_shift);
    6650                 :             :           }
    6651                 :             :         else
    6652                 :             :           {
    6653                 :             :             /* We need CONST_SHIFT extra bits to do the operation.  */
    6654                 :      244122 :             operation_precision = (stmt_info->min_output_precision
    6655                 :             :                                    + const_shift);
    6656                 :      244122 :             min_input_precision = operation_precision;
    6657                 :             :           }
    6658                 :             :         break;
    6659                 :             :       }
    6660                 :             : 
    6661                 :     8143838 :     default:
    6662                 :     8143838 :       if (vect_truncatable_operation_p (code))
    6663                 :             :         {
    6664                 :             :           /* Input bit N has no effect on output bits N-1 and lower.  */
    6665                 :     4799307 :           operation_precision = stmt_info->min_output_precision;
    6666                 :     4799307 :           min_input_precision = operation_precision;
    6667                 :     4799307 :           break;
    6668                 :             :         }
    6669                 :             :       return;
    6670                 :             :     }
    6671                 :             : 
    6672                 :     7066342 :   if (operation_precision < precision)
    6673                 :             :     {
    6674                 :      100068 :       if (dump_enabled_p ())
    6675                 :        3521 :         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
    6676                 :             :                          " without affecting users: %G",
    6677                 :        3521 :                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
    6678                 :             :                          operation_precision, (gimple *) stmt);
    6679                 :      200136 :       vect_set_operation_type (stmt_info, type, operation_precision,
    6680                 :      100068 :                                TYPE_SIGN (type));
    6681                 :             :     }
    6682                 :     7066342 :   vect_set_min_input_precision (stmt_info, type, min_input_precision);
    6683                 :             : }
    6684                 :             : 
    6685                 :             : /* Return true if the statement described by STMT_INFO sets a boolean
    6686                 :             :    SSA_NAME and if we know how to vectorize this kind of statement using
    6687                 :             :    vector mask types.  */
    6688                 :             : 
    6689                 :             : static bool
    6690                 :    30529773 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
    6691                 :             : {
    6692                 :    30529773 :   tree lhs = gimple_get_lhs (stmt_info->stmt);
    6693                 :    30529773 :   tree_code code = ERROR_MARK;
    6694                 :    30529773 :   gassign *assign = NULL;
    6695                 :    30529773 :   gcond *cond = NULL;
    6696                 :             : 
    6697                 :    30529773 :   if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
    6698                 :    23312803 :     code = gimple_assign_rhs_code (assign);
    6699                 :    13431653 :   else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
    6700                 :             :     {
    6701                 :     3913811 :       lhs = gimple_cond_lhs (cond);
    6702                 :     3913811 :       code = gimple_cond_code (cond);
    6703                 :             :     }
    6704                 :             : 
    6705                 :    30529773 :   if (!lhs
    6706                 :    27984402 :       || TREE_CODE (lhs) != SSA_NAME
    6707                 :    54315033 :       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
    6708                 :             :     return false;
    6709                 :             : 
    6710                 :     1733999 :   if (code != ERROR_MARK)
    6711                 :             :     {
    6712                 :     1499658 :       switch (code)
    6713                 :             :         {
    6714                 :             :         CASE_CONVERT:
    6715                 :             :         case SSA_NAME:
    6716                 :             :         case BIT_NOT_EXPR:
    6717                 :             :         case BIT_IOR_EXPR:
    6718                 :             :         case BIT_XOR_EXPR:
    6719                 :             :         case BIT_AND_EXPR:
    6720                 :             :           return true;
    6721                 :             : 
    6722                 :     1160861 :         default:
    6723                 :     1160861 :           return TREE_CODE_CLASS (code) == tcc_comparison;
    6724                 :             :         }
    6725                 :             :     }
    6726                 :      234341 :   else if (is_a <gphi *> (stmt_info->stmt))
    6727                 :      144685 :     return true;
    6728                 :             :   return false;
    6729                 :             : }
    6730                 :             : 
    6731                 :             : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
    6732                 :             :    a vector mask type instead of a normal vector type.  Record the
    6733                 :             :    result in STMT_INFO->mask_precision.  */
    6734                 :             : 
    6735                 :             : static void
    6736                 :    30529773 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
    6737                 :             : {
    6738                 :    30529773 :   if (!possible_vector_mask_operation_p (stmt_info))
    6739                 :             :     return;
    6740                 :             : 
    6741                 :             :   /* If at least one boolean input uses a vector mask type,
    6742                 :             :      pick the mask type with the narrowest elements.
    6743                 :             : 
    6744                 :             :      ??? This is the traditional behavior.  It should always produce
    6745                 :             :      the smallest number of operations, but isn't necessarily the
    6746                 :             :      optimal choice.  For example, if we have:
    6747                 :             : 
    6748                 :             :        a = b & c
    6749                 :             : 
    6750                 :             :      where:
    6751                 :             : 
    6752                 :             :        - the user of a wants it to have a mask type for 16-bit elements (M16)
    6753                 :             :        - b also uses M16
    6754                 :             :        - c uses a mask type for 8-bit elements (M8)
    6755                 :             : 
    6756                 :             :      then picking M8 gives:
    6757                 :             : 
    6758                 :             :        - 1 M16->M8 pack for b
    6759                 :             :        - 1 M8 AND for a
    6760                 :             :        - 2 M8->M16 unpacks for the user of a
    6761                 :             : 
    6762                 :             :      whereas picking M16 would have given:
    6763                 :             : 
    6764                 :             :        - 2 M8->M16 unpacks for c
    6765                 :             :        - 2 M16 ANDs for a
    6766                 :             : 
    6767                 :             :      The number of operations are equal, but M16 would have given
    6768                 :             :      a shorter dependency chain and allowed more ILP.  */
    6769                 :     1610750 :   unsigned int precision = ~0U;
    6770                 :     1610750 :   gimple *stmt = STMT_VINFO_STMT (stmt_info);
    6771                 :             : 
    6772                 :             :   /* If the statement compares two values that shouldn't use vector masks,
    6773                 :             :      try comparing the values as normal scalars instead.  */
    6774                 :     1610750 :   tree_code code = ERROR_MARK;
    6775                 :     1610750 :   tree op0_type;
    6776                 :     1610750 :   unsigned int nops = -1;
    6777                 :     1610750 :   unsigned int ops_start = 0;
    6778                 :             : 
    6779                 :     1610750 :   if (gassign *assign = dyn_cast <gassign *> (stmt))
    6780                 :             :     {
    6781                 :     1020343 :       code = gimple_assign_rhs_code (assign);
    6782                 :     1020343 :       op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
    6783                 :     1020343 :       nops = gimple_num_ops (assign);
    6784                 :     1020343 :       ops_start = 1;
    6785                 :             :     }
    6786                 :      590407 :   else if (gcond *cond = dyn_cast <gcond *> (stmt))
    6787                 :             :     {
    6788                 :      445722 :       code = gimple_cond_code (cond);
    6789                 :      445722 :       op0_type = TREE_TYPE (gimple_cond_lhs (cond));
    6790                 :      445722 :       nops = 2;
    6791                 :      445722 :       ops_start = 0;
    6792                 :             :     }
    6793                 :             : 
    6794                 :     1610750 :   if (code != ERROR_MARK)
    6795                 :             :     {
    6796                 :     4367077 :       for (unsigned int i = ops_start; i < nops; ++i)
    6797                 :             :         {
    6798                 :     2901012 :           tree rhs = gimple_op (stmt, i);
    6799                 :     2901012 :           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
    6800                 :     1373670 :             continue;
    6801                 :             : 
    6802                 :     1527342 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6803                 :     1527342 :           if (!def_stmt_info)
    6804                 :             :             /* Don't let external or constant operands influence the choice.
    6805                 :             :                We can convert them to whichever vector type we pick.  */
    6806                 :      456978 :             continue;
    6807                 :             : 
    6808                 :     1070364 :           if (def_stmt_info->mask_precision)
    6809                 :             :             {
    6810                 :      905320 :               if (precision > def_stmt_info->mask_precision)
    6811                 :     2901012 :                 precision = def_stmt_info->mask_precision;
    6812                 :             :             }
    6813                 :             :         }
    6814                 :             : 
    6815                 :     1466065 :       if (precision == ~0U
    6816                 :     1180607 :           && TREE_CODE_CLASS (code) == tcc_comparison)
    6817                 :             :         {
    6818                 :     1005994 :           scalar_mode mode;
    6819                 :     1005994 :           tree vectype, mask_type;
    6820                 :     1005994 :           if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
    6821                 :     1005994 :               && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
    6822                 :      860481 :               && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
    6823                 :      860481 :               && expand_vec_cmp_expr_p (vectype, mask_type, code))
    6824                 :     1329952 :             precision = GET_MODE_BITSIZE (mode);
    6825                 :             :         }
    6826                 :             :     }
    6827                 :             :   else
    6828                 :             :     {
    6829                 :      144685 :       gphi *phi = as_a <gphi *> (stmt_info->stmt);
    6830                 :      524680 :       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
    6831                 :             :         {
    6832                 :      379995 :           tree rhs = gimple_phi_arg_def (phi, i);
    6833                 :             : 
    6834                 :      379995 :           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
    6835                 :      379995 :           if (!def_stmt_info)
    6836                 :             :             /* Don't let external or constant operands influence the choice.
    6837                 :             :                We can convert them to whichever vector type we pick.  */
    6838                 :      228877 :             continue;
    6839                 :             : 
    6840                 :      151118 :           if (def_stmt_info->mask_precision)
    6841                 :             :             {
    6842                 :      130506 :               if (precision > def_stmt_info->mask_precision)
    6843                 :      379995 :                 precision = def_stmt_info->mask_precision;
    6844                 :             :             }
    6845                 :             :         }
    6846                 :             :     }
    6847                 :             : 
    6848                 :     1610750 :   if (dump_enabled_p ())
    6849                 :             :     {
    6850                 :        6541 :       if (precision == ~0U)
    6851                 :        1633 :         dump_printf_loc (MSG_NOTE, vect_location,
    6852                 :             :                          "using normal nonmask vectors for %G",
    6853                 :             :                          stmt_info->stmt);
    6854                 :             :       else
    6855                 :        4908 :         dump_printf_loc (MSG_NOTE, vect_location,
    6856                 :             :                          "using boolean precision %d for %G",
    6857                 :             :                          precision, stmt_info->stmt);
    6858                 :             :     }
    6859                 :             : 
    6860                 :     1610750 :   stmt_info->mask_precision = precision;
    6861                 :             : }
    6862                 :             : 
    6863                 :             : /* Handle vect_determine_precisions for STMT_INFO, given that we
    6864                 :             :    have already done so for the users of its result.  */
    6865                 :             : 
    6866                 :             : void
    6867                 :    30529773 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
    6868                 :             : {
    6869                 :    30529773 :   vect_determine_min_output_precision (vinfo, stmt_info);
    6870                 :    30529773 :   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
    6871                 :             :     {
    6872                 :    17098120 :       vect_determine_precisions_from_range (stmt_info, stmt);
    6873                 :    17098120 :       vect_determine_precisions_from_users (stmt_info, stmt);
    6874                 :             :     }
    6875                 :    30529773 : }
    6876                 :             : 
    6877                 :             : /* Walk backwards through the vectorizable region to determine the
    6878                 :             :    values of these fields:
    6879                 :             : 
    6880                 :             :    - min_output_precision
    6881                 :             :    - min_input_precision
    6882                 :             :    - operation_precision
    6883                 :             :    - operation_sign.  */
    6884                 :             : 
    6885                 :             : void
    6886                 :      858521 : vect_determine_precisions (vec_info *vinfo)
    6887                 :             : {
    6888                 :      858521 :   DUMP_VECT_SCOPE ("vect_determine_precisions");
    6889                 :             : 
    6890                 :      858521 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    6891                 :             :     {
    6892                 :      284010 :       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    6893                 :      284010 :       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
    6894                 :      284010 :       unsigned int nbbs = loop->num_nodes;
    6895                 :             : 
    6896                 :      985251 :       for (unsigned int i = 0; i < nbbs; i++)
    6897                 :             :         {
    6898                 :      701241 :           basic_block bb = bbs[i];
    6899                 :      701241 :           for (auto gsi = gsi_start_phis (bb);
    6900                 :     1455770 :                !gsi_end_p (gsi); gsi_next (&gsi))
    6901                 :             :             {
    6902                 :      754529 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6903                 :      754529 :               if (stmt_info)
    6904                 :      754529 :                 vect_determine_mask_precision (vinfo, stmt_info);
    6905                 :             :             }
    6906                 :     5350893 :           for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    6907                 :     3948411 :             if (!is_gimple_debug (gsi_stmt (si)))
    6908                 :     2881246 :               vect_determine_mask_precision
    6909                 :     2881246 :                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
    6910                 :             :         }
    6911                 :      985251 :       for (unsigned int i = 0; i < nbbs; i++)
    6912                 :             :         {
    6913                 :      701241 :           basic_block bb = bbs[nbbs - i - 1];
    6914                 :      701241 :           for (gimple_stmt_iterator si = gsi_last_bb (bb);
    6915                 :     8598063 :                !gsi_end_p (si); gsi_prev (&si))
    6916                 :     3948411 :             if (!is_gimple_debug (gsi_stmt (si)))
    6917                 :     2881246 :               vect_determine_stmt_precisions
    6918                 :     2881246 :                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
    6919                 :      701241 :           for (auto gsi = gsi_start_phis (bb);
    6920                 :     1455770 :                !gsi_end_p (gsi); gsi_next (&gsi))
    6921                 :             :             {
    6922                 :      754529 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6923                 :      754529 :               if (stmt_info)
    6924                 :      754529 :                 vect_determine_stmt_precisions (vinfo, stmt_info);
    6925                 :             :             }
    6926                 :             :         }
    6927                 :             :     }
    6928                 :             :   else
    6929                 :             :     {
    6930                 :      574511 :       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
    6931                 :    18228792 :       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
    6932                 :             :         {
    6933                 :     8539885 :           basic_block bb = bb_vinfo->bbs[i];
    6934                 :    13582734 :           for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6935                 :             :             {
    6936                 :     5042849 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6937                 :     5042849 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6938                 :     4864264 :                 vect_determine_mask_precision (vinfo, stmt_info);
    6939                 :             :             }
    6940                 :    80764028 :           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    6941                 :             :             {
    6942                 :    63684258 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6943                 :    63684258 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6944                 :    22029734 :                 vect_determine_mask_precision (vinfo, stmt_info);
    6945                 :             :             }
    6946                 :             :         }
    6947                 :     9114396 :       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
    6948                 :             :         {
    6949                 :     8539885 :           for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
    6950                 :   135908401 :                !gsi_end_p (gsi); gsi_prev (&gsi))
    6951                 :             :             {
    6952                 :    63684258 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
    6953                 :    63684258 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6954                 :    22029734 :                 vect_determine_stmt_precisions (vinfo, stmt_info);
    6955                 :             :             }
    6956                 :     8539885 :           for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
    6957                 :    13582734 :                !gsi_end_p (gsi); gsi_next (&gsi))
    6958                 :             :             {
    6959                 :     5042849 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
    6960                 :     5042849 :               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
    6961                 :     4864264 :                 vect_determine_stmt_precisions (vinfo, stmt_info);
    6962                 :             :             }
    6963                 :             :         }
    6964                 :             :     }
    6965                 :      858521 : }
    6966                 :             : 
    6967                 :             : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
    6968                 :             : 
    6969                 :             : struct vect_recog_func
    6970                 :             : {
    6971                 :             :   vect_recog_func_ptr fn;
    6972                 :             :   const char *name;
    6973                 :             : };
    6974                 :             : 
    6975                 :             : /* Note that ordering matters - the first pattern matching on a stmt is
    6976                 :             :    taken which means usually the more complex one needs to preceed the
    6977                 :             :    less comples onex (widen_sum only after dot_prod or sad for example).  */
    6978                 :             : static vect_recog_func vect_vect_recog_func_ptrs[] = {
    6979                 :             :   { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
    6980                 :             :   { vect_recog_bit_insert_pattern, "bit_insert" },
    6981                 :             :   { vect_recog_abd_pattern, "abd" },
    6982                 :             :   { vect_recog_over_widening_pattern, "over_widening" },
    6983                 :             :   /* Must come after over_widening, which narrows the shift as much as
    6984                 :             :      possible beforehand.  */
    6985                 :             :   { vect_recog_average_pattern, "average" },
    6986                 :             :   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
    6987                 :             :   { vect_recog_mulhs_pattern, "mult_high" },
    6988                 :             :   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    6989                 :             :   { vect_recog_widen_mult_pattern, "widen_mult" },
    6990                 :             :   { vect_recog_dot_prod_pattern, "dot_prod" },
    6991                 :             :   { vect_recog_sad_pattern, "sad" },
    6992                 :             :   { vect_recog_widen_sum_pattern, "widen_sum" },
    6993                 :             :   { vect_recog_pow_pattern, "pow" },
    6994                 :             :   { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
    6995                 :             :   { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
    6996                 :             :   { vect_recog_widen_shift_pattern, "widen_shift" },
    6997                 :             :   { vect_recog_rotate_pattern, "rotate" },
    6998                 :             :   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
    6999                 :             :   { vect_recog_divmod_pattern, "divmod" },
    7000                 :             :   { vect_recog_mult_pattern, "mult" },
    7001                 :             :   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
    7002                 :             :   { vect_recog_gcond_pattern, "gcond" },
    7003                 :             :   { vect_recog_bool_pattern, "bool" },
    7004                 :             :   /* This must come before mask conversion, and includes the parts
    7005                 :             :      of mask conversion that are needed for gather and scatter
    7006                 :             :      internal functions.  */
    7007                 :             :   { vect_recog_gather_scatter_pattern, "gather_scatter" },
    7008                 :             :   { vect_recog_mask_conversion_pattern, "mask_conversion" },
    7009                 :             :   { vect_recog_widen_plus_pattern, "widen_plus" },
    7010                 :             :   { vect_recog_widen_minus_pattern, "widen_minus" },
    7011                 :             :   { vect_recog_widen_abd_pattern, "widen_abd" },
    7012                 :             :   /* These must come after the double widening ones.  */
    7013                 :             : };
    7014                 :             : 
    7015                 :             : const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
    7016                 :             : 
    7017                 :             : /* Mark statements that are involved in a pattern.  */
    7018                 :             : 
    7019                 :             : void
    7020                 :      617391 : vect_mark_pattern_stmts (vec_info *vinfo,
    7021                 :             :                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
    7022                 :             :                          tree pattern_vectype)
    7023                 :             : {
    7024                 :      617391 :   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
    7025                 :      617391 :   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7026                 :             : 
    7027                 :      617391 :   gimple *orig_pattern_stmt = NULL;
    7028                 :      617391 :   if (is_pattern_stmt_p (orig_stmt_info))
    7029                 :             :     {
    7030                 :             :       /* We're replacing a statement in an existing pattern definition
    7031                 :             :          sequence.  */
    7032                 :        7680 :       orig_pattern_stmt = orig_stmt_info->stmt;
    7033                 :        7680 :       if (dump_enabled_p ())
    7034                 :         496 :         dump_printf_loc (MSG_NOTE, vect_location,
    7035                 :             :                          "replacing earlier pattern %G", orig_pattern_stmt);
    7036                 :             : 
    7037                 :             :       /* To keep the book-keeping simple, just swap the lhs of the
    7038                 :             :          old and new statements, so that the old one has a valid but
    7039                 :             :          unused lhs.  */
    7040                 :        7680 :       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
    7041                 :        7680 :       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
    7042                 :        7680 :       gimple_set_lhs (pattern_stmt, old_lhs);
    7043                 :             : 
    7044                 :        7680 :       if (dump_enabled_p ())
    7045                 :         496 :         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
    7046                 :             : 
    7047                 :             :       /* Switch to the statement that ORIG replaces.  */
    7048                 :        7680 :       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
    7049                 :             : 
    7050                 :             :       /* We shouldn't be replacing the main pattern statement.  */
    7051                 :        7680 :       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
    7052                 :             :                   != orig_pattern_stmt);
    7053                 :             :     }
    7054                 :             : 
    7055                 :      617391 :   if (def_seq)
    7056                 :             :     for (gimple_stmt_iterator si = gsi_start (def_seq);
    7057                 :     1270567 :          !gsi_end_p (si); gsi_next (&si))
    7058                 :             :       {
    7059                 :      764491 :         if (dump_enabled_p ())
    7060                 :       20732 :           dump_printf_loc (MSG_NOTE, vect_location,
    7061                 :             :                            "extra pattern stmt: %G", gsi_stmt (si));
    7062                 :      764491 :         stmt_vec_info pattern_stmt_info
    7063                 :      764491 :           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
    7064                 :             :                                     orig_stmt_info, pattern_vectype);
    7065                 :             :         /* Stmts in the def sequence are not vectorizable cycle or
    7066                 :             :            induction defs, instead they should all be vect_internal_def
    7067                 :             :            feeding the main pattern stmt which retains this def type.  */
    7068                 :      764491 :         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
    7069                 :             :       }
    7070                 :             : 
    7071                 :      617391 :   if (orig_pattern_stmt)
    7072                 :             :     {
    7073                 :        7680 :       vect_init_pattern_stmt (vinfo, pattern_stmt,
    7074                 :             :                               orig_stmt_info, pattern_vectype);
    7075                 :             : 
    7076                 :             :       /* Insert all the new pattern statements before the original one.  */
    7077                 :        7680 :       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
    7078                 :        7680 :       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
    7079                 :             :                                                orig_def_seq);
    7080                 :        7680 :       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
    7081                 :        7680 :       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
    7082                 :             : 
    7083                 :             :       /* Remove the pattern statement that this new pattern replaces.  */
    7084                 :        7680 :       gsi_remove (&gsi, false);
    7085                 :             :     }
    7086                 :             :   else
    7087                 :      609711 :     vect_set_pattern_stmt (vinfo,
    7088                 :             :                            pattern_stmt, orig_stmt_info, pattern_vectype);
    7089                 :             : 
    7090                 :             :   /* For any conditionals mark them as vect_condition_def.  */
    7091                 :      617391 :   if (is_a <gcond *> (pattern_stmt))
    7092                 :      197904 :     STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
    7093                 :             : 
    7094                 :             :   /* Transfer reduction path info to the pattern.  */
    7095                 :      617391 :   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
    7096                 :             :     {
    7097                 :        3586 :       gimple_match_op op;
    7098                 :        3586 :       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
    7099                 :           0 :         gcc_unreachable ();
    7100                 :        3586 :       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
    7101                 :             :       /* Search the pattern def sequence and the main pattern stmt.  Note
    7102                 :             :          we may have inserted all into a containing pattern def sequence
    7103                 :             :          so the following is a bit awkward.  */
    7104                 :        3586 :       gimple_stmt_iterator si;
    7105                 :        3586 :       gimple *s;
    7106                 :        3586 :       if (def_seq)
    7107                 :             :         {
    7108                 :        2740 :           si = gsi_start (def_seq);
    7109                 :        2740 :           s = gsi_stmt (si);
    7110                 :        2740 :           gsi_next (&si);
    7111                 :             :         }
    7112                 :             :       else
    7113                 :             :         {
    7114                 :             :           si = gsi_none ();
    7115                 :             :           s = pattern_stmt;
    7116                 :             :         }
    7117                 :        6437 :       do
    7118                 :             :         {
    7119                 :        6437 :           bool found = false;
    7120                 :        6437 :           if (gimple_extract_op (s, &op))
    7121                 :       14027 :             for (unsigned i = 0; i < op.num_ops; ++i)
    7122                 :       11329 :               if (op.ops[i] == lookfor)
    7123                 :             :                 {
    7124                 :        3739 :                   STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
    7125                 :        3739 :                   lookfor = gimple_get_lhs (s);
    7126                 :        3739 :                   found = true;
    7127                 :        3739 :                   break;
    7128                 :             :                 }
    7129                 :        6437 :           if (s == pattern_stmt)
    7130                 :             :             {
    7131                 :        3586 :               if (!found && dump_enabled_p ())
    7132                 :           0 :                 dump_printf_loc (MSG_NOTE, vect_location,
    7133                 :             :                                  "failed to update reduction index.\n");
    7134                 :        3586 :               break;
    7135                 :             :             }
    7136                 :        2851 :           if (gsi_end_p (si))
    7137                 :             :             s = pattern_stmt;
    7138                 :             :           else
    7139                 :             :             {
    7140                 :         111 :               s = gsi_stmt (si);
    7141                 :         111 :               if (s == pattern_stmt)
    7142                 :             :                 /* Found the end inside a bigger pattern def seq.  */
    7143                 :             :                 si = gsi_none ();
    7144                 :             :               else
    7145                 :         111 :                 gsi_next (&si);
    7146                 :             :             }
    7147                 :             :         } while (1);
    7148                 :             :     }
    7149                 :      617391 : }
    7150                 :             : 
    7151                 :             : /* Function vect_pattern_recog_1
    7152                 :             : 
    7153                 :             :    Input:
    7154                 :             :    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
    7155                 :             :         computation pattern.
    7156                 :             :    STMT_INFO: A stmt from which the pattern search should start.
    7157                 :             : 
    7158                 :             :    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
    7159                 :             :    a sequence of statements that has the same functionality and can be
    7160                 :             :    used to replace STMT_INFO.  It returns the last statement in the sequence
    7161                 :             :    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
    7162                 :             :    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
    7163                 :             :    statement, having first checked that the target supports the new operation
    7164                 :             :    in that type.
    7165                 :             : 
    7166                 :             :    This function also does some bookkeeping, as explained in the documentation
    7167                 :             :    for vect_recog_pattern.  */
    7168                 :             : 
    7169                 :             : static void
    7170                 :   707336510 : vect_pattern_recog_1 (vec_info *vinfo,
    7171                 :             :                       vect_recog_func *recog_func, stmt_vec_info stmt_info)
    7172                 :             : {
    7173                 :   707336510 :   gimple *pattern_stmt;
    7174                 :   707336510 :   loop_vec_info loop_vinfo;
    7175                 :   707336510 :   tree pattern_vectype;
    7176                 :             : 
    7177                 :             :   /* If this statement has already been replaced with pattern statements,
    7178                 :             :      leave the original statement alone, since the first match wins.
    7179                 :             :      Instead try to match against the definition statements that feed
    7180                 :             :      the main pattern statement.  */
    7181                 :   707336510 :   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
    7182                 :             :     {
    7183                 :     7320441 :       gimple_stmt_iterator gsi;
    7184                 :     7320441 :       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7185                 :    17149511 :            !gsi_end_p (gsi); gsi_next (&gsi))
    7186                 :     9829070 :         vect_pattern_recog_1 (vinfo, recog_func,
    7187                 :             :                               vinfo->lookup_stmt (gsi_stmt (gsi)));
    7188                 :             :       return;
    7189                 :             :     }
    7190                 :             : 
    7191                 :   700016069 :   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
    7192                 :   700016069 :   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
    7193                 :   700016069 :   if (!pattern_stmt)
    7194                 :             :     {
    7195                 :             :       /* Clear any half-formed pattern definition sequence.  */
    7196                 :   699398678 :       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
    7197                 :   699398678 :       return;
    7198                 :             :     }
    7199                 :             : 
    7200                 :      617391 :   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    7201                 :             :  
    7202                 :             :   /* Found a vectorizable pattern.  */
    7203                 :      617391 :   if (dump_enabled_p ())
    7204                 :       17148 :     dump_printf_loc (MSG_NOTE, vect_location,
    7205                 :             :                      "%s pattern recognized: %G",
    7206                 :             :                      recog_func->name, pattern_stmt);
    7207                 :             : 
    7208                 :             :   /* Mark the stmts that are involved in the pattern. */
    7209                 :      617391 :   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
    7210                 :             : 
    7211                 :             :   /* Patterns cannot be vectorized using SLP, because they change the order of
    7212                 :             :      computation.  */
    7213                 :      617391 :   if (loop_vinfo)
    7214                 :             :     {
    7215                 :      278034 :       unsigned ix, ix2;
    7216                 :      278034 :       stmt_vec_info *elem_ptr;
    7217                 :      324526 :       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
    7218                 :             :                              elem_ptr, *elem_ptr == stmt_info);
    7219                 :             :     }
    7220                 :             : }
    7221                 :             : 
    7222                 :             : 
    7223                 :             : /* Function vect_pattern_recog
    7224                 :             : 
    7225                 :             :    Input:
    7226                 :             :    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
    7227                 :             :         computation idioms.
    7228                 :             : 
    7229                 :             :    Output - for each computation idiom that is detected we create a new stmt
    7230                 :             :         that provides the same functionality and that can be vectorized.  We
    7231                 :             :         also record some information in the struct_stmt_info of the relevant
    7232                 :             :         stmts, as explained below:
    7233                 :             : 
    7234                 :             :    At the entry to this function we have the following stmts, with the
    7235                 :             :    following initial value in the STMT_VINFO fields:
    7236                 :             : 
    7237                 :             :          stmt                     in_pattern_p  related_stmt    vec_stmt
    7238                 :             :          S1: a_i = ....                 -       -               -
    7239                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7240                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7241                 :             :          S4: a_0 = ..use(a_1)..         -       -               -
    7242                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7243                 :             : 
    7244                 :             :    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
    7245                 :             :    represented by a single stmt.  We then:
    7246                 :             :    - create a new stmt S6 equivalent to the pattern (the stmt is not
    7247                 :             :      inserted into the code)
    7248                 :             :    - fill in the STMT_VINFO fields as follows:
    7249                 :             : 
    7250                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7251                 :             :          S1: a_i = ....                 -       -               -
    7252                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7253                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7254                 :             :          S4: a_0 = ..use(a_1)..         true    S6              -
    7255                 :             :           '---> S6: a_new = ....        -       S4              -
    7256                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7257                 :             : 
    7258                 :             :    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
    7259                 :             :    to each other through the RELATED_STMT field).
    7260                 :             : 
    7261                 :             :    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    7262                 :             :    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    7263                 :             :    remain irrelevant unless used by stmts other than S4.
    7264                 :             : 
    7265                 :             :    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
    7266                 :             :    (because they are marked as irrelevant).  It will vectorize S6, and record
    7267                 :             :    a pointer to the new vector stmt VS6 from S6 (as usual).
    7268                 :             :    S4 will be skipped, and S5 will be vectorized as usual:
    7269                 :             : 
    7270                 :             :                                   in_pattern_p  related_stmt    vec_stmt
    7271                 :             :          S1: a_i = ....                 -       -               -
    7272                 :             :          S2: a_2 = ..use(a_i)..         -       -               -
    7273                 :             :          S3: a_1 = ..use(a_2)..         -       -               -
    7274                 :             :        > VS6: va_new = ....             -       -               -
    7275                 :             :          S4: a_0 = ..use(a_1)..         true    S6              VS6
    7276                 :             :           '---> S6: a_new = ....        -       S4              VS6
    7277                 :             :        > VS5: ... = ..vuse(va_new)..    -       -               -
    7278                 :             :          S5: ... = ..use(a_0)..         -       -               -
    7279                 :             : 
    7280                 :             :    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    7281                 :             :    elsewhere), and we'll end up with:
    7282                 :             : 
    7283                 :             :         VS6: va_new = ....
    7284                 :             :         VS5: ... = ..vuse(va_new)..
    7285                 :             : 
    7286                 :             :    In case of more than one pattern statements, e.g., widen-mult with
    7287                 :             :    intermediate type:
    7288                 :             : 
    7289                 :             :      S1  a_t = ;
    7290                 :             :      S2  a_T = (TYPE) a_t;
    7291                 :             :            '--> S3: a_it = (interm_type) a_t;
    7292                 :             :      S4  prod_T = a_T * CONST;
    7293                 :             :            '--> S5: prod_T' = a_it w* CONST;
    7294                 :             : 
    7295                 :             :    there may be other users of a_T outside the pattern.  In that case S2 will
    7296                 :             :    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
    7297                 :             :    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
    7298                 :             :    be recorded in S3.  */
    7299                 :             : 
    7300                 :             : void
    7301                 :      858521 : vect_pattern_recog (vec_info *vinfo)
    7302                 :             : {
    7303                 :      858521 :   class loop *loop;
    7304                 :      858521 :   basic_block *bbs;
    7305                 :      858521 :   unsigned int nbbs;
    7306                 :      858521 :   gimple_stmt_iterator si;
    7307                 :      858521 :   unsigned int i, j;
    7308                 :             : 
    7309                 :      858521 :   vect_determine_precisions (vinfo);
    7310                 :             : 
    7311                 :      858521 :   DUMP_VECT_SCOPE ("vect_pattern_recog");
    7312                 :             : 
    7313                 :      858521 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    7314                 :             :     {
    7315                 :      284010 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    7316                 :      284010 :       bbs = LOOP_VINFO_BBS (loop_vinfo);
    7317                 :      284010 :       nbbs = loop->num_nodes;
    7318                 :             : 
    7319                 :             :       /* Scan through the loop stmts, applying the pattern recognition
    7320                 :             :          functions starting at each stmt visited:  */
    7321                 :      985251 :       for (i = 0; i < nbbs; i++)
    7322                 :             :         {
    7323                 :      701241 :           basic_block bb = bbs[i];
    7324                 :     5350893 :           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    7325                 :             :             {
    7326                 :     3948411 :               if (is_gimple_debug (gsi_stmt (si)))
    7327                 :     1067165 :                 continue;
    7328                 :     2881246 :               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
    7329                 :             :               /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7330                 :    86437380 :               for (j = 0; j < NUM_PATTERNS; j++)
    7331                 :    80674888 :                 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
    7332                 :             :                                       stmt_info);
    7333                 :             :             }
    7334                 :             :         }
    7335                 :             :     }
    7336                 :             :   else
    7337                 :             :     {
    7338                 :      574511 :       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
    7339                 :    18228792 :       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
    7340                 :    17079770 :         for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
    7341                 :    72224143 :              !gsi_end_p (gsi); gsi_next (&gsi))
    7342                 :             :           {
    7343                 :    63684258 :             stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
    7344                 :    63684258 :             if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
    7345                 :    41654524 :               continue;
    7346                 :             : 
    7347                 :             :             /* Scan over all generic vect_recog_xxx_pattern functions.  */
    7348                 :   638862286 :             for (j = 0; j < NUM_PATTERNS; j++)
    7349                 :   616832552 :               vect_pattern_recog_1 (vinfo,
    7350                 :             :                                     &vect_vect_recog_func_ptrs[j], stmt_info);
    7351                 :             :           }
    7352                 :             :     }
    7353                 :             : 
    7354                 :             :   /* After this no more add_stmt calls are allowed.  */
    7355                 :      858521 :   vinfo->stmt_vec_info_ro = true;
    7356                 :      858521 : }
    7357                 :             : 
    7358                 :             : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
    7359                 :             :    or internal_fn contained in ch, respectively.  */
    7360                 :             : gimple *
    7361                 :      111911 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
    7362                 :             : {
    7363                 :      111911 :   gcc_assert (op0 != NULL_TREE);
    7364                 :      111911 :   if (ch.is_tree_code ())
    7365                 :      111911 :     return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
    7366                 :             : 
    7367                 :           0 :   gcc_assert (ch.is_internal_fn ());
    7368                 :           0 :   gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
    7369                 :             :                                              op1 == NULL_TREE ? 1 : 2,
    7370                 :             :                                              op0, op1);
    7371                 :           0 :   gimple_call_set_lhs (stmt, lhs);
    7372                 :           0 :   return stmt;
    7373                 :             : }
        

Generated by: LCOV version 2.0-1

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.