LCOV - code coverage report
Current view: top level - gcc - tree-vectorizer.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.9 % 939 835
Test Date: 2026-06-20 15:32:29 Functions: 89.2 % 65 58
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : /* Loop and basic block vectorizer.
      22              : 
      23              :   This file contains drivers for the three vectorizers:
      24              :   (1) loop vectorizer (inter-iteration parallelism),
      25              :   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
      26              :       vectorizer)
      27              :   (3) BB vectorizer (out-of-loops), aka SLP
      28              : 
      29              :   The rest of the vectorizer's code is organized as follows:
      30              :   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
      31              :     used by drivers (1) and (2).
      32              :   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
      33              :     drivers (1) and (2).
      34              :   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
      35              :     used by drivers (2) and (3).
      36              :   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
      37              :   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
      38              :     manipulations (used by all).
      39              :   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
      40              : 
      41              :   Here's a poor attempt at illustrating that:
      42              : 
      43              :      tree-vectorizer.cc:
      44              :      loop_vect()  loop_aware_slp()  slp_vect()
      45              :           |        /           \          /
      46              :           |       /             \        /
      47              :           tree-vect-loop.cc  tree-vect-slp.cc
      48              :                 | \      \  /      /   |
      49              :                 |  \      \/      /    |
      50              :                 |   \     /\     /     |
      51              :                 |    \   /  \   /      |
      52              :          tree-vect-stmts.cc  tree-vect-data-refs.cc
      53              :                        \      /
      54              :                     tree-vect-patterns.cc
      55              : */
      56              : 
      57              : #include "config.h"
      58              : #include "system.h"
      59              : #include "coretypes.h"
      60              : #include "backend.h"
      61              : #include "tree.h"
      62              : #include "gimple.h"
      63              : #include "predict.h"
      64              : #include "tree-pass.h"
      65              : #include "ssa.h"
      66              : #include "cgraph.h"
      67              : #include "fold-const.h"
      68              : #include "stor-layout.h"
      69              : #include "gimple-iterator.h"
      70              : #include "gimple-walk.h"
      71              : #include "tree-ssa-loop-manip.h"
      72              : #include "tree-ssa-loop-niter.h"
      73              : #include "tree-cfg.h"
      74              : #include "cfgloop.h"
      75              : #include "tree-vectorizer.h"
      76              : #include "tree-ssa-propagate.h"
      77              : #include "dbgcnt.h"
      78              : #include "tree-scalar-evolution.h"
      79              : #include "stringpool.h"
      80              : #include "attribs.h"
      81              : #include "gimple-pretty-print.h"
      82              : #include "opt-problem.h"
      83              : #include "internal-fn.h"
      84              : #include "tree-ssa-sccvn.h"
      85              : #include "tree-into-ssa.h"
      86              : #include "gimple-range.h"
      87              : 
      88              : /* Loop or bb location, with hotness information.  */
      89              : dump_user_location_t vect_location;
      90              : 
      91              : /* auto_purge_vect_location's dtor: reset the vect_location
      92              :    global, to avoid stale location_t values that could reference
      93              :    GC-ed blocks.  */
      94              : 
      95      1371038 : auto_purge_vect_location::~auto_purge_vect_location ()
      96              : {
      97      1371038 :   vect_location = dump_user_location_t ();
      98      1371038 : }
      99              : 
     100              : /* Dump a cost entry according to args to F.  */
     101              : 
     102              : void
     103       219989 : dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
     104              :                 stmt_vec_info stmt_info, slp_tree node, tree,
     105              :                 int misalign, unsigned cost,
     106              :                 enum vect_cost_model_location where)
     107              : {
     108       219989 :   if (stmt_info)
     109              :     {
     110       202587 :       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
     111       202587 :       fprintf (f, " ");
     112              :     }
     113        17402 :   else if (node)
     114         3493 :     fprintf (f, "node %p ", (void *)node);
     115              :   else
     116        13909 :     fprintf (f, "<unknown> ");
     117       219989 :   fprintf (f, "%d times ", count);
     118       219989 :   const char *ks = "unknown";
     119       219989 :   switch (kind)
     120              :     {
     121        49593 :     case scalar_stmt:
     122        49593 :       ks = "scalar_stmt";
     123        49593 :       break;
     124        37308 :     case scalar_load:
     125        37308 :       ks = "scalar_load";
     126        37308 :       break;
     127        28119 :     case scalar_store:
     128        28119 :       ks = "scalar_store";
     129        28119 :       break;
     130        34994 :     case vector_stmt:
     131        34994 :       ks = "vector_stmt";
     132        34994 :       break;
     133        22101 :     case vector_load:
     134        22101 :       ks = "vector_load";
     135        22101 :       break;
     136            0 :     case vector_gather_load:
     137            0 :       ks = "vector_gather_load";
     138            0 :       break;
     139         7876 :     case unaligned_load:
     140         7876 :       ks = "unaligned_load";
     141         7876 :       break;
     142         4666 :     case unaligned_store:
     143         4666 :       ks = "unaligned_store";
     144         4666 :       break;
     145         8891 :     case vector_store:
     146         8891 :       ks = "vector_store";
     147         8891 :       break;
     148            0 :     case vector_scatter_store:
     149            0 :       ks = "vector_scatter_store";
     150            0 :       break;
     151         2265 :     case vec_to_scalar:
     152         2265 :       ks = "vec_to_scalar";
     153         2265 :       break;
     154         9433 :     case scalar_to_vec:
     155         9433 :       ks = "scalar_to_vec";
     156         9433 :       break;
     157           12 :     case cond_branch_not_taken:
     158           12 :       ks = "cond_branch_not_taken";
     159           12 :       break;
     160          544 :     case cond_branch_taken:
     161          544 :       ks = "cond_branch_taken";
     162          544 :       break;
     163         6518 :     case vec_perm:
     164         6518 :       ks = "vec_perm";
     165         6518 :       break;
     166         5418 :     case vec_promote_demote:
     167         5418 :       ks = "vec_promote_demote";
     168         5418 :       break;
     169         1129 :     case vec_construct:
     170         1129 :       ks = "vec_construct";
     171         1129 :       break;
     172         1122 :     case vec_deconstruct:
     173         1122 :       ks = "vec_deconstruct";
     174         1122 :       break;
     175              :     }
     176       219989 :   fprintf (f, "%s ", ks);
     177       219989 :   if (kind == unaligned_load || kind == unaligned_store)
     178        12542 :     fprintf (f, "(misalign %d) ", misalign);
     179       219989 :   fprintf (f, "costs %u ", cost);
     180       219989 :   const char *ws = "unknown";
     181       219989 :   switch (where)
     182              :     {
     183        23306 :     case vect_prologue:
     184        23306 :       ws = "prologue";
     185        23306 :       break;
     186       189229 :     case vect_body:
     187       189229 :       ws = "body";
     188       189229 :       break;
     189         7454 :     case vect_epilogue:
     190         7454 :       ws = "epilogue";
     191         7454 :       break;
     192              :     }
     193       219989 :   fprintf (f, "in %s\n", ws);
     194       219989 : }
     195              : 
     196              : /* For mapping simduid to vectorization factor.  */
     197              : 
     198              : class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
     199              : {
     200              : public:
     201              :   unsigned int simduid;
     202              :   poly_uint64 vf;
     203              : 
     204              :   /* hash_table support.  */
     205              :   static inline hashval_t hash (const simduid_to_vf *);
     206              :   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
     207              : };
     208              : 
     209              : inline hashval_t
     210         7664 : simduid_to_vf::hash (const simduid_to_vf *p)
     211              : {
     212         7664 :   return p->simduid;
     213              : }
     214              : 
     215              : inline int
     216        14168 : simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
     217              : {
     218        14168 :   return p1->simduid == p2->simduid;
     219              : }
     220              : 
     221              : /* This hash maps the OMP simd array to the corresponding simduid used
     222              :    to index into it.  Like thus,
     223              : 
     224              :         _7 = GOMP_SIMD_LANE (simduid.0)
     225              :         ...
     226              :         ...
     227              :         D.1737[_7] = stuff;
     228              : 
     229              : 
     230              :    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
     231              :    simduid.0.  */
     232              : 
     233              : struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
     234              : {
     235              :   tree decl;
     236              :   unsigned int simduid;
     237              : 
     238              :   /* hash_table support.  */
     239              :   static inline hashval_t hash (const simd_array_to_simduid *);
     240              :   static inline int equal (const simd_array_to_simduid *,
     241              :                            const simd_array_to_simduid *);
     242              : };
     243              : 
     244              : inline hashval_t
     245        23644 : simd_array_to_simduid::hash (const simd_array_to_simduid *p)
     246              : {
     247        23644 :   return DECL_UID (p->decl);
     248              : }
     249              : 
     250              : inline int
     251        16566 : simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
     252              :                               const simd_array_to_simduid *p2)
     253              : {
     254        16566 :   return p1->decl == p2->decl;
     255              : }
     256              : 
     257              : /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
     258              :    into their corresponding constants and remove
     259              :    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
     260              : 
     261              : static void
     262         7809 : adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
     263              : {
     264         7809 :   basic_block bb;
     265              : 
     266       118569 :   FOR_EACH_BB_FN (bb, fun)
     267              :     {
     268       110760 :       gimple_stmt_iterator i;
     269              : 
     270       738824 :       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
     271              :         {
     272       517304 :           poly_uint64 vf = 1;
     273       517304 :           enum internal_fn ifn;
     274       517304 :           gimple *stmt = gsi_stmt (i);
     275       517304 :           tree t;
     276       517304 :           if (!is_gimple_call (stmt)
     277       517304 :               || !gimple_call_internal_p (stmt))
     278              :             {
     279       509741 :               gsi_next (&i);
     280       510657 :               continue;
     281              :             }
     282         7563 :           ifn = gimple_call_internal_fn (stmt);
     283         7563 :           switch (ifn)
     284              :             {
     285         6647 :             case IFN_GOMP_SIMD_LANE:
     286         6647 :             case IFN_GOMP_SIMD_VF:
     287         6647 :             case IFN_GOMP_SIMD_LAST_LANE:
     288         6647 :               break;
     289          158 :             case IFN_GOMP_SIMD_ORDERED_START:
     290          158 :             case IFN_GOMP_SIMD_ORDERED_END:
     291          158 :               if (integer_onep (gimple_call_arg (stmt, 0)))
     292              :                 {
     293            5 :                   enum built_in_function bcode
     294              :                     = (ifn == IFN_GOMP_SIMD_ORDERED_START
     295           10 :                        ? BUILT_IN_GOMP_ORDERED_START
     296              :                        : BUILT_IN_GOMP_ORDERED_END);
     297           10 :                   gimple *g
     298           10 :                     = gimple_build_call (builtin_decl_explicit (bcode), 0);
     299           10 :                   gimple_move_vops (g, stmt);
     300           10 :                   gsi_replace (&i, g, true);
     301           10 :                   continue;
     302           10 :                 }
     303          148 :               gsi_remove (&i, true);
     304          148 :               unlink_stmt_vdef (stmt);
     305          148 :               continue;
     306          758 :             default:
     307          758 :               gsi_next (&i);
     308          758 :               continue;
     309          906 :             }
     310         6647 :           tree arg = gimple_call_arg (stmt, 0);
     311         6647 :           gcc_assert (arg != NULL_TREE);
     312         6647 :           gcc_assert (TREE_CODE (arg) == SSA_NAME);
     313         6647 :           simduid_to_vf *p = NULL, data;
     314         6647 :           data.simduid = DECL_UID (SSA_NAME_VAR (arg));
     315              :           /* Need to nullify loop safelen field since it's value is not
     316              :              valid after transformation.  */
     317         6647 :           if (bb->loop_father && bb->loop_father->safelen > 0)
     318         2169 :             bb->loop_father->safelen = 0;
     319         6647 :           if (htab)
     320              :             {
     321         4742 :               p = htab->find (&data);
     322         4742 :               if (p)
     323         4701 :                 vf = p->vf;
     324              :             }
     325         6647 :           switch (ifn)
     326              :             {
     327          969 :             case IFN_GOMP_SIMD_VF:
     328          969 :               t = build_int_cst (unsigned_type_node, vf);
     329          969 :               break;
     330         3484 :             case IFN_GOMP_SIMD_LANE:
     331         3484 :               t = build_int_cst (unsigned_type_node, 0);
     332         3484 :               break;
     333         2194 :             case IFN_GOMP_SIMD_LAST_LANE:
     334         2194 :               t = gimple_call_arg (stmt, 1);
     335         2194 :               break;
     336              :             default:
     337              :               gcc_unreachable ();
     338              :             }
     339         6647 :           tree lhs = gimple_call_lhs (stmt);
     340         6647 :           if (lhs)
     341         6590 :             replace_uses_by (lhs, t);
     342         6647 :           release_defs (stmt);
     343         6647 :           gsi_remove (&i, true);
     344              :         }
     345              :     }
     346         7809 : }
     347              : 
     348              : /* Helper structure for note_simd_array_uses.  */
     349              : 
     350              : struct note_simd_array_uses_struct
     351              : {
     352              :   hash_table<simd_array_to_simduid> **htab;
     353              :   unsigned int simduid;
     354              : };
     355              : 
     356              : /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
     357              : 
     358              : static tree
     359        65131 : note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
     360              : {
     361        65131 :   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
     362        65131 :   struct note_simd_array_uses_struct *ns
     363              :     = (struct note_simd_array_uses_struct *) wi->info;
     364              : 
     365        65131 :   if (TYPE_P (*tp))
     366            0 :     *walk_subtrees = 0;
     367        65131 :   else if (VAR_P (*tp)
     368        12360 :            && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
     369        77491 :            && DECL_CONTEXT (*tp) == current_function_decl)
     370              :     {
     371        12360 :       simd_array_to_simduid data;
     372        12360 :       if (!*ns->htab)
     373         2208 :         *ns->htab = new hash_table<simd_array_to_simduid> (15);
     374        12360 :       data.decl = *tp;
     375        12360 :       data.simduid = ns->simduid;
     376        12360 :       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
     377        12360 :       if (*slot == NULL)
     378              :         {
     379         5501 :           simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
     380         5501 :           *p = data;
     381         5501 :           *slot = p;
     382              :         }
     383         6859 :       else if ((*slot)->simduid != ns->simduid)
     384            0 :         (*slot)->simduid = -1U;
     385        12360 :       *walk_subtrees = 0;
     386              :     }
     387        65131 :   return NULL_TREE;
     388              : }
     389              : 
     390              : /* Find "omp simd array" temporaries and map them to corresponding
     391              :    simduid.  */
     392              : 
     393              : static void
     394         7809 : note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
     395              : {
     396         7809 :   basic_block bb;
     397         7809 :   gimple_stmt_iterator gsi;
     398         7809 :   struct walk_stmt_info wi;
     399         7809 :   struct note_simd_array_uses_struct ns;
     400              : 
     401         7809 :   memset (&wi, 0, sizeof (wi));
     402         7809 :   wi.info = &ns;
     403         7809 :   ns.htab = htab;
     404              : 
     405       102453 :   FOR_EACH_BB_FN (bb, fun)
     406       583018 :     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     407              :       {
     408       393730 :         gimple *stmt = gsi_stmt (gsi);
     409       393730 :         if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
     410       387146 :           continue;
     411         7509 :         switch (gimple_call_internal_fn (stmt))
     412              :           {
     413         6620 :           case IFN_GOMP_SIMD_LANE:
     414         6620 :           case IFN_GOMP_SIMD_VF:
     415         6620 :           case IFN_GOMP_SIMD_LAST_LANE:
     416         6620 :             break;
     417          889 :           default:
     418          889 :             continue;
     419              :           }
     420         6620 :         tree lhs = gimple_call_lhs (stmt);
     421         6620 :         if (lhs == NULL_TREE)
     422           36 :           continue;
     423         6584 :         imm_use_iterator use_iter;
     424         6584 :         gimple *use_stmt;
     425         6584 :         ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
     426        31198 :         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
     427        18030 :           if (!is_gimple_debug (use_stmt))
     428        24522 :             walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
     429              :       }
     430         7809 : }
     431              : 
     432              : /* Shrink arrays with "omp simd array" attribute to the corresponding
     433              :    vectorization factor.  */
     434              : 
     435              : static void
     436         2208 : shrink_simd_arrays
     437              :   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
     438              :    hash_table<simduid_to_vf> *simduid_to_vf_htab)
     439              : {
     440         7709 :   for (hash_table<simd_array_to_simduid>::iterator iter
     441         2208 :          = simd_array_to_simduid_htab->begin ();
     442        13210 :        iter != simd_array_to_simduid_htab->end (); ++iter)
     443         5501 :     if ((*iter)->simduid != -1U)
     444              :       {
     445         5501 :         tree decl = (*iter)->decl;
     446         5501 :         poly_uint64 vf = 1;
     447         5501 :         if (simduid_to_vf_htab)
     448              :           {
     449         4574 :             simduid_to_vf *p = NULL, data;
     450         4574 :             data.simduid = (*iter)->simduid;
     451         4574 :             p = simduid_to_vf_htab->find (&data);
     452         4574 :             if (p)
     453         4540 :               vf = p->vf;
     454              :           }
     455         5501 :         tree atype
     456         5501 :           = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
     457         5501 :         TREE_TYPE (decl) = atype;
     458         5501 :         relayout_decl (decl);
     459              :       }
     460              : 
     461         2208 :   delete simd_array_to_simduid_htab;
     462         2208 : }
     463              : 
     464              : /* Initialize the vec_info with kind KIND_IN and target cost data
     465              :    TARGET_COST_DATA_IN.  */
     466              : 
     467      2759047 : vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
     468      2759047 :   : kind (kind_in),
     469      2759047 :     shared (shared_),
     470      2759047 :     stmt_vec_info_ro (false),
     471      2759047 :     bbs (NULL),
     472      2759047 :     nbbs (0),
     473      2759047 :     inv_pattern_def_seq (NULL)
     474              : {
     475      2759047 :   stmt_vec_infos.create (50);
     476      2759047 : }
     477              : 
     478      2759047 : vec_info::~vec_info ()
     479              : {
     480      4969430 :   for (slp_instance &instance : slp_instances)
     481      1120361 :     vect_free_slp_instance (instance);
     482              : 
     483      2759047 :   free_stmt_vec_infos ();
     484      2759047 : }
     485              : 
     486      2328073 : vec_info_shared::vec_info_shared ()
     487      2328073 :   : datarefs (vNULL),
     488      2328073 :     datarefs_copy (vNULL),
     489      2328073 :     ddrs (vNULL)
     490              : {
     491      2328073 : }
     492              : 
     493      2328073 : vec_info_shared::~vec_info_shared ()
     494              : {
     495      2328073 :   free_data_refs (datarefs);
     496      2328073 :   free_dependence_relations (ddrs);
     497      2328073 :   datarefs_copy.release ();
     498      2328073 : }
     499              : 
     500              : void
     501      2099616 : vec_info_shared::save_datarefs ()
     502              : {
     503      2099616 :   if (!flag_checking)
     504              :     return;
     505      3150377 :   datarefs_copy.reserve_exact (datarefs.length ());
     506     12712913 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     507     10613314 :     datarefs_copy.quick_push (*datarefs[i]);
     508              : }
     509              : 
     510              : void
     511       886419 : vec_info_shared::check_datarefs ()
     512              : {
     513       886419 :   if (!flag_checking)
     514              :     return;
     515      2656111 :   gcc_assert (datarefs.length () == datarefs_copy.length ());
     516     12235355 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     517     11348936 :     if (memcmp (&datarefs_copy[i], datarefs[i],
     518              :                 offsetof (data_reference, alt_indices)) != 0)
     519            0 :       gcc_unreachable ();
     520              : }
     521              : 
     522              : /* Record that STMT belongs to the vectorizable region.  Create and return
     523              :    an associated stmt_vec_info.  */
     524              : 
     525              : stmt_vec_info
     526     61746381 : vec_info::add_stmt (gimple *stmt)
     527              : {
     528     61746381 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     529     61746381 :   set_vinfo_for_stmt (stmt, res);
     530     61746381 :   return res;
     531              : }
     532              : 
     533              : /* Record that STMT belongs to the vectorizable region.  Create a new
     534              :    stmt_vec_info and mark VECINFO as being related and return the new
     535              :    stmt_vec_info.  */
     536              : 
     537              : stmt_vec_info
     538         1107 : vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
     539              : {
     540         1107 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     541         1107 :   res->pattern_stmt_p = true;
     542         1107 :   set_vinfo_for_stmt (stmt, res, false);
     543         1107 :   STMT_VINFO_RELATED_STMT (res) = stmt_info;
     544         1107 :   return res;
     545              : }
     546              : 
     547              : /* If STMT was previously associated with a stmt_vec_info and STMT now resides
     548              :    at a different address than before (e.g., because STMT is a phi node that has
     549              :    been resized), update the stored address to match the new one.  It is not
     550              :    possible to use lookup_stmt () to perform this task, because that function
     551              :    returns NULL if the stored stmt pointer does not match the one being looked
     552              :    up.  */
     553              : 
     554              : stmt_vec_info
     555        10865 : vec_info::resync_stmt_addr (gimple *stmt)
     556              : {
     557        10865 :   unsigned int uid = gimple_uid (stmt);
     558        10865 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     559              :     {
     560        10865 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     561        10865 :       if (res && res->stmt)
     562              :         {
     563        10865 :           res->stmt = stmt;
     564        10865 :           return res;
     565              :         }
     566              :     }
     567              :   return nullptr;
     568              : }
     569              : 
     570              : /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
     571              :    return null.  It is safe to call this function on any statement, even if
     572              :    it might not be part of the vectorizable region.  */
     573              : 
     574              : stmt_vec_info
     575    479889027 : vec_info::lookup_stmt (gimple *stmt)
     576              : {
     577    479889027 :   unsigned int uid = gimple_uid (stmt);
     578    479889027 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     579              :     {
     580    302633279 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     581    302633279 :       if (res && res->stmt == stmt)
     582    302275197 :         return res;
     583              :     }
     584              :   return NULL;
     585              : }
     586              : 
     587              : /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
     588              :    return that stmt_vec_info, otherwise return null.  It is safe to call
     589              :    this on arbitrary operands.  */
     590              : 
     591              : stmt_vec_info
     592     56559070 : vec_info::lookup_def (tree name)
     593              : {
     594     56559070 :   if (TREE_CODE (name) == SSA_NAME
     595     56559070 :       && !SSA_NAME_IS_DEFAULT_DEF (name))
     596     51306109 :     return lookup_stmt (SSA_NAME_DEF_STMT (name));
     597              :   return NULL;
     598              : }
     599              : 
     600              : /* See whether there is a single non-debug statement that uses LHS and
     601              :    whether that statement has an associated stmt_vec_info.  Return the
     602              :    stmt_vec_info if so, otherwise return null.  */
     603              : 
     604              : stmt_vec_info
     605         1908 : vec_info::lookup_single_use (tree lhs)
     606              : {
     607         1908 :   use_operand_p dummy;
     608         1908 :   gimple *use_stmt;
     609         1908 :   if (single_imm_use (lhs, &dummy, &use_stmt))
     610         1759 :     return lookup_stmt (use_stmt);
     611              :   return NULL;
     612              : }
     613              : 
     614              : /* Return vectorization information about DR.  */
     615              : 
     616              : dr_vec_info *
     617     49911251 : vec_info::lookup_dr (data_reference *dr)
     618              : {
     619     49911251 :   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
     620              :   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
     621     49911251 :   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
     622     49911251 :   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
     623              : }
     624              : 
     625              : /* Record that NEW_STMT_INFO now implements the same data reference
     626              :    as OLD_STMT_INFO.  */
     627              : 
     628              : void
     629         6410 : vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
     630              : {
     631         6410 :   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
     632         6410 :   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
     633         6410 :   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
     634         6410 :   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
     635         6410 :     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
     636         6410 :   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
     637         6410 :     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
     638         6410 :   STMT_VINFO_STRIDED_P (new_stmt_info)
     639         6410 :     = STMT_VINFO_STRIDED_P (old_stmt_info);
     640         6410 :   STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
     641         6410 :     = STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
     642         6410 : }
     643              : 
     644              : /* Permanently remove the statement described by STMT_INFO from the
     645              :    function.  */
     646              : 
     647              : void
     648      1495614 : vec_info::remove_stmt (stmt_vec_info stmt_info)
     649              : {
     650      1495614 :   gcc_assert (!stmt_info->pattern_stmt_p);
     651      1495614 :   set_vinfo_for_stmt (stmt_info->stmt, NULL);
     652      1495614 :   unlink_stmt_vdef (stmt_info->stmt);
     653      1495614 :   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
     654      1495614 :   gsi_remove (&si, true);
     655      1495614 :   release_defs (stmt_info->stmt);
     656      1495614 :   free_stmt_vec_info (stmt_info);
     657      1495614 : }
     658              : 
     659              : /* Replace the statement at GSI by NEW_STMT, both the vectorization
     660              :    information and the function itself.  STMT_INFO describes the statement
     661              :    at GSI.  */
     662              : 
     663              : void
     664         5193 : vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
     665              :                         gimple *new_stmt)
     666              : {
     667         5193 :   gimple *old_stmt = stmt_info->stmt;
     668         5193 :   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
     669         5193 :   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
     670         5193 :   stmt_info->stmt = new_stmt;
     671         5193 :   gsi_replace (gsi, new_stmt, true);
     672         5193 : }
     673              : 
     674              : /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
     675              :    not NULL it specifies whether to use the sub-region entry
     676              :    determined by it, currently used for loop vectorization to insert
     677              :    on the inner loop entry vs. the outer loop entry.  */
     678              : 
     679              : void
     680       101860 : vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
     681              : {
     682       101860 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
     683              :     {
     684        19163 :       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     685        19163 :       basic_block new_bb;
     686        19163 :       edge pe;
     687              : 
     688        19163 :       if (context && nested_in_vect_loop_p (loop, context))
     689              :         loop = loop->inner;
     690              : 
     691        19163 :       pe = loop_preheader_edge (loop);
     692        19163 :       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     693        19163 :       gcc_assert (!new_bb);
     694              :     }
     695              :   else
     696              :     {
     697        82697 :       gimple_stmt_iterator gsi_region_begin
     698        82697 :         = gsi_after_labels (bbs[0]);
     699        82697 :       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
     700              :     }
     701       101860 : }
     702              : 
     703              : /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
     704              : 
     705              : void
     706         3299 : vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
     707              : {
     708         3299 :   gimple_seq seq = NULL;
     709         3299 :   gimple_stmt_iterator gsi = gsi_start (seq);
     710         3299 :   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
     711         3299 :   insert_seq_on_entry (context, seq);
     712         3299 : }
     713              : 
     714              : /* Create and initialize a new stmt_vec_info struct for STMT.  */
     715              : 
     716              : stmt_vec_info
     717     61747488 : vec_info::new_stmt_vec_info (gimple *stmt)
     718              : {
     719     61747488 :   stmt_vec_info res = XCNEW (class _stmt_vec_info);
     720     61747488 :   res->stmt = stmt;
     721              : 
     722     61747488 :   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
     723     61747488 :   STMT_VINFO_VECTORIZABLE (res) = true;
     724     61747488 :   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
     725     61747488 :   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
     726     61747488 :   STMT_VINFO_REDUC_IDX (res) = -1;
     727     61747488 :   STMT_VINFO_REDUC_DEF (res) = NULL;
     728     61747488 :   STMT_VINFO_SLP_VECT_ONLY (res) = false;
     729              : 
     730     61747488 :   if (is_a <loop_vec_info> (this)
     731      8597686 :       && gimple_code (stmt) == GIMPLE_PHI
     732     63271136 :       && is_loop_header_bb_p (gimple_bb (stmt)))
     733      1512904 :     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
     734              :   else
     735     60234584 :     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
     736              : 
     737     61747488 :   STMT_SLP_TYPE (res) = not_vect;
     738              : 
     739              :   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
     740     61747488 :   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
     741              : 
     742     61747488 :   return res;
     743              : }
     744              : 
     745              : /* Associate STMT with INFO.  */
     746              : 
     747              : void
     748     63243102 : vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
     749              : {
     750     63243102 :   unsigned int uid = gimple_uid (stmt);
     751     63243102 :   if (uid == 0)
     752              :     {
     753     61747488 :       gcc_assert (!check_ro || !stmt_vec_info_ro);
     754     61747488 :       gcc_checking_assert (info);
     755     61747488 :       uid = stmt_vec_infos.length () + 1;
     756     61747488 :       gimple_set_uid (stmt, uid);
     757     61747488 :       stmt_vec_infos.safe_push (info);
     758              :     }
     759              :   else
     760              :     {
     761      1495614 :       gcc_checking_assert (info == NULL);
     762      1495614 :       stmt_vec_infos[uid - 1] = info;
     763              :     }
     764     63243102 : }
     765              : 
     766              : /* Free the contents of stmt_vec_infos.  */
     767              : 
     768              : void
     769      2759047 : vec_info::free_stmt_vec_infos (void)
     770              : {
     771     70024629 :   for (stmt_vec_info &info : stmt_vec_infos)
     772     61747488 :     if (info != NULL)
     773     60251874 :       free_stmt_vec_info (info);
     774      2759047 :   stmt_vec_infos.release ();
     775      2759047 : }
     776              : 
     777              : /* Free STMT_INFO.  */
     778              : 
     779              : void
     780     61747488 : vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
     781              : {
     782     61747488 :   if (stmt_info->pattern_stmt_p)
     783              :     {
     784      2428948 :       gimple_set_bb (stmt_info->stmt, NULL);
     785      2428948 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
     786      2428948 :       if (lhs && TREE_CODE (lhs) == SSA_NAME)
     787      2060688 :         release_ssa_name (lhs);
     788              :     }
     789              : 
     790     61747488 :   free (stmt_info);
     791     61747488 : }
     792              : 
     793              : /* Returns true if S1 dominates S2.  */
     794              : 
     795              : bool
     796       553096 : vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
     797              : {
     798       553096 :   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
     799              : 
     800              :   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
     801              :      SSA_NAME.  Assume it lives at the beginning of function and
     802              :      thus dominates everything.  */
     803       553096 :   if (!bb1 || s1 == s2)
     804              :     return true;
     805              : 
     806              :   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
     807       551062 :   if (!bb2)
     808              :     return false;
     809              : 
     810       551062 :   if (bb1 != bb2)
     811       189469 :     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
     812              : 
     813              :   /* PHIs in the same basic block are assumed to be
     814              :      executed all in parallel, if only one stmt is a PHI,
     815              :      it dominates the other stmt in the same basic block.  */
     816       361593 :   if (gimple_code (s1) == GIMPLE_PHI)
     817              :     return true;
     818              : 
     819       325136 :   if (gimple_code (s2) == GIMPLE_PHI)
     820              :     return false;
     821              : 
     822              :   /* Inserted vectorized stmts all have UID 0 while the original stmts
     823              :      in the IL have UID increasing within a BB.  Walk from both sides
     824              :      until we find the other stmt or a stmt with UID != 0.  */
     825       307487 :   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
     826       777819 :   while (gimple_uid (gsi_stmt (gsi1)) == 0)
     827              :     {
     828       587773 :       gsi_next (&gsi1);
     829       587773 :       if (gsi_end_p (gsi1))
     830              :         return false;
     831       586059 :       if (gsi_stmt (gsi1) == s2)
     832              :         return true;
     833              :     }
     834       190046 :   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
     835              :     return false;
     836              : 
     837       190046 :   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
     838       797566 :   while (gimple_uid (gsi_stmt (gsi2)) == 0)
     839              :     {
     840       619683 :       gsi_prev (&gsi2);
     841       619683 :       if (gsi_end_p (gsi2))
     842              :         return false;
     843       607528 :       if (gsi_stmt (gsi2) == s1)
     844              :         return true;
     845              :     }
     846       177883 :   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
     847              :     return false;
     848              : 
     849       177883 :   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
     850              :     return true;
     851              :   return false;
     852              : }
     853              : 
     854              : /* A helper function to free scev and LOOP niter information, as well as
     855              :    clear loop constraint LOOP_C_FINITE.  */
     856              : 
     857              : void
     858        44151 : vect_free_loop_info_assumptions (class loop *loop)
     859              : {
     860        44151 :   scev_reset_htab ();
     861              :   /* We need to explicitly reset upper bound information since they are
     862              :      used even after free_numbers_of_iterations_estimates.  */
     863        44151 :   loop->any_upper_bound = false;
     864        44151 :   loop->any_likely_upper_bound = false;
     865        44151 :   free_numbers_of_iterations_estimates (loop);
     866        44151 :   loop_constraint_clear (loop, LOOP_C_FINITE);
     867        44151 : }
     868              : 
     869              : /* If LOOP has been versioned during ifcvt, return the internal call
     870              :    guarding it.  */
     871              : 
     872              : gimple *
     873       518441 : vect_loop_vectorized_call (class loop *loop, gcond **cond)
     874              : {
     875       518441 :   basic_block bb = loop_preheader_edge (loop)->src;
     876       957894 :   gimple *g;
     877      1397347 :   do
     878              :     {
     879       957894 :       g = *gsi_last_bb (bb);
     880       608080 :       if ((g && gimple_code (g) == GIMPLE_COND)
     881      2055052 :           || !single_succ_p (bb))
     882              :         break;
     883       578717 :       if (!single_pred_p (bb))
     884              :         break;
     885       439453 :       bb = single_pred (bb);
     886              :     }
     887              :   while (1);
     888       518441 :   if (g && gimple_code (g) == GIMPLE_COND)
     889              :     {
     890       372959 :       if (cond)
     891            0 :         *cond = as_a <gcond *> (g);
     892       372959 :       gimple_stmt_iterator gsi = gsi_for_stmt (g);
     893       372959 :       gsi_prev (&gsi);
     894       372959 :       if (!gsi_end_p (gsi))
     895              :         {
     896       341537 :           g = gsi_stmt (gsi);
     897       341537 :           if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
     898       341537 :               && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
     899        30169 :                   || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
     900        60621 :             return g;
     901              :         }
     902              :     }
     903              :   return NULL;
     904              : }
     905              : 
     906              : /* If LOOP has been versioned during loop distribution, return the gurading
     907              :    internal call.  */
     908              : 
     909              : static gimple *
     910       481702 : vect_loop_dist_alias_call (class loop *loop, function *fun)
     911              : {
     912       481702 :   basic_block bb;
     913       481702 :   basic_block entry;
     914       481702 :   class loop *outer, *orig;
     915              : 
     916       481702 :   if (loop->orig_loop_num == 0)
     917              :     return NULL;
     918              : 
     919          168 :   orig = get_loop (fun, loop->orig_loop_num);
     920          168 :   if (orig == NULL)
     921              :     {
     922              :       /* The original loop is somehow destroyed.  Clear the information.  */
     923            0 :       loop->orig_loop_num = 0;
     924            0 :       return NULL;
     925              :     }
     926              : 
     927          168 :   if (loop != orig)
     928           95 :     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
     929              :   else
     930           73 :     bb = loop_preheader_edge (loop)->src;
     931              : 
     932          168 :   outer = bb->loop_father;
     933          168 :   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
     934              : 
     935              :   /* Look upward in dominance tree.  */
     936          751 :   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
     937          583 :        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
     938              :     {
     939          699 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
     940          699 :       if (!safe_is_a <gcond *> (*gsi))
     941          583 :         continue;
     942              : 
     943          514 :       gsi_prev (&gsi);
     944          514 :       if (gsi_end_p (gsi))
     945            8 :         continue;
     946              : 
     947          506 :       gimple *g = gsi_stmt (gsi);
     948              :       /* The guarding internal function call must have the same distribution
     949              :          alias id.  */
     950          506 :       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
     951          506 :           && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
     952       481702 :         return g;
     953              :     }
     954              :   return NULL;
     955              : }
     956              : 
     957              : /* Set the uids of all the statements in basic blocks inside loop
     958              :    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
     959              :    call guarding the loop which has been if converted.  */
     960              : static void
     961         7807 : set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
     962              :                   function *fun)
     963              : {
     964         7807 :   tree arg = gimple_call_arg (loop_vectorized_call, 1);
     965         7807 :   basic_block *bbs;
     966         7807 :   unsigned int i;
     967         7807 :   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
     968              : 
     969         7807 :   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
     970         7807 :   LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo)
     971         7807 :     = vec_init_loop_exit_info (scalar_loop);
     972         7807 :   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
     973              :                        == loop_vectorized_call);
     974              :   /* If we are going to vectorize outer loop, prevent vectorization
     975              :      of the inner loop in the scalar loop - either the scalar loop is
     976              :      thrown away, so it is a wasted work, or is used only for
     977              :      a few iterations.  */
     978         7807 :   if (scalar_loop->inner)
     979              :     {
     980          120 :       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
     981          120 :       if (g)
     982              :         {
     983          120 :           arg = gimple_call_arg (g, 0);
     984          120 :           get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
     985          120 :           fold_loop_internal_call (g, boolean_false_node);
     986              :         }
     987              :     }
     988         7807 :   bbs = get_loop_body (scalar_loop);
     989        42491 :   for (i = 0; i < scalar_loop->num_nodes; i++)
     990              :     {
     991        34684 :       basic_block bb = bbs[i];
     992        34684 :       gimple_stmt_iterator gsi;
     993        66841 :       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     994              :         {
     995        32157 :           gimple *phi = gsi_stmt (gsi);
     996        32157 :           gimple_set_uid (phi, 0);
     997              :         }
     998       175231 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     999              :         {
    1000       105863 :           gimple *stmt = gsi_stmt (gsi);
    1001       105863 :           gimple_set_uid (stmt, 0);
    1002              :         }
    1003              :     }
    1004         7807 :   free (bbs);
    1005         7807 : }
    1006              : 
    1007              : /* Generate vectorized code for LOOP and its epilogues.  */
    1008              : 
    1009              : static unsigned
    1010        61803 : vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1011              :                       loop_p loop, gimple *loop_vectorized_call,
    1012              :                       function *fun)
    1013              : {
    1014        61803 :   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
    1015              : 
    1016        61803 :   if (loop_vectorized_call)
    1017         7807 :     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
    1018              : 
    1019        61803 :   unsigned HOST_WIDE_INT bytes;
    1020        61803 :   if (dump_enabled_p ())
    1021              :     {
    1022        22030 :       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
    1023        11015 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1024              :                          "%sloop vectorized using %s%wu byte vectors and"
    1025              :                          " unroll factor %u\n",
    1026        11015 :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1027              :                          ? "epilogue " : "",
    1028        11015 :                          LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    1029              :                          ? "masked " : "", bytes,
    1030              :                          (unsigned int) LOOP_VINFO_VECT_FACTOR
    1031        11015 :                                                  (loop_vinfo).to_constant ());
    1032              :       else
    1033              :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1034              :                          "%sloop vectorized using variable length vectors\n",
    1035              :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1036              :                          ? "epilogue " : "");
    1037              :     }
    1038              : 
    1039        61803 :   loop_p new_loop = vect_transform_loop (loop_vinfo,
    1040              :                                          loop_vectorized_call);
    1041              :   /* Now that the loop has been vectorized, allow it to be unrolled
    1042              :      etc.  */
    1043        61803 :   loop->force_vectorize = false;
    1044              : 
    1045        61803 :   if (loop->simduid)
    1046              :     {
    1047         1895 :       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
    1048         1895 :       if (!simduid_to_vf_htab)
    1049         1535 :         simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
    1050         1895 :       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
    1051         1895 :       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
    1052         1895 :       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
    1053         1895 :           = simduid_to_vf_data;
    1054              :     }
    1055              : 
    1056              :   /* We should not have to update virtual SSA form here but some
    1057              :      transforms involve creating new virtual definitions which makes
    1058              :      updating difficult.
    1059              :      We delay the actual update to the end of the pass but avoid
    1060              :      confusing ourselves by forcing need_ssa_update_p () to false.  */
    1061        61803 :   unsigned todo = 0;
    1062        61803 :   if (need_ssa_update_p (cfun))
    1063              :     {
    1064          119 :       gcc_assert (loop_vinfo->any_known_not_updated_vssa);
    1065          119 :       fun->gimple_df->ssa_renaming_needed = false;
    1066          119 :       todo |= TODO_update_ssa_only_virtuals;
    1067              :     }
    1068        61803 :   gcc_assert (!need_ssa_update_p (cfun));
    1069              : 
    1070              :   /* Epilogue of vectorized loop must be vectorized too.  */
    1071        61803 :   if (new_loop)
    1072         6847 :     todo |= vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
    1073              : 
    1074        61803 :   return todo;
    1075              : }
    1076              : 
    1077              : /* Try to vectorize LOOP.  */
    1078              : 
    1079              : static unsigned
    1080       464638 : try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1081              :                       unsigned *num_vectorized_loops, loop_p loop,
    1082              :                       gimple *loop_vectorized_call,
    1083              :                       gimple *loop_dist_alias_call,
    1084              :                       function *fun)
    1085              : {
    1086       464638 :   unsigned ret = 0;
    1087       464638 :   vec_info_shared shared;
    1088       464638 :   auto_purge_vect_location sentinel;
    1089       464638 :   vect_location = find_loop_location (loop);
    1090              : 
    1091       464638 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    1092       464638 :       && dump_enabled_p ())
    1093        15028 :     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
    1094              :                  "\nAnalyzing loop at %s:%d\n",
    1095        15028 :                  LOCATION_FILE (vect_location.get_location_t ()),
    1096        30056 :                  LOCATION_LINE (vect_location.get_location_t ()));
    1097              : 
    1098              :   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
    1099       464638 :   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, loop_vectorized_call,
    1100              :                                                     &shared);
    1101       464638 :   loop->aux = loop_vinfo;
    1102              : 
    1103       464638 :   if (!loop_vinfo)
    1104       409678 :     if (dump_enabled_p ())
    1105         5782 :       if (opt_problem *problem = loop_vinfo.get_problem ())
    1106              :         {
    1107         5782 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1108              :                            "couldn't vectorize loop\n");
    1109         5782 :           problem->emit_and_clear ();
    1110              :         }
    1111              : 
    1112       464638 :   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
    1113              :     {
    1114              :       /* Free existing information if loop is analyzed with some
    1115              :          assumptions.  */
    1116       409678 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1117         7990 :         vect_free_loop_info_assumptions (loop);
    1118              : 
    1119              :       /* If we applied if-conversion then try to vectorize the
    1120              :          BB of innermost loops.
    1121              :          ???  Ideally BB vectorization would learn to vectorize
    1122              :          control flow by applying if-conversion on-the-fly, the
    1123              :          following retains the if-converted loop body even when
    1124              :          only non-if-converted parts took part in BB vectorization.  */
    1125       409678 :       if (flag_tree_slp_vectorize != 0
    1126       408660 :           && loop_vectorized_call
    1127        20981 :           && ! loop->inner)
    1128              :         {
    1129        20180 :           basic_block bb = loop->header;
    1130        20180 :           bool require_loop_vectorize = false;
    1131        40360 :           for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
    1132       735064 :                !gsi_end_p (gsi); gsi_next (&gsi))
    1133              :             {
    1134       715705 :               gimple *stmt = gsi_stmt (gsi);
    1135       715705 :               gcall *call = dyn_cast <gcall *> (stmt);
    1136         1243 :               if (call && gimple_call_internal_p (call))
    1137              :                 {
    1138         1146 :                   internal_fn ifn = gimple_call_internal_fn (call);
    1139         1146 :                   if (ifn == IFN_MASK_LOAD
    1140         1146 :                       || ifn == IFN_MASK_STORE
    1141          729 :                       || ifn == IFN_MASK_CALL
    1142              :                       /* Don't keep the if-converted parts when the ifn with
    1143              :                          specific type is not supported by the backend.  */
    1144         1856 :                       || (direct_internal_fn_p (ifn)
    1145          385 :                           && !direct_internal_fn_supported_p
    1146          385 :                           (call, OPTIMIZE_FOR_SPEED)))
    1147              :                     {
    1148              :                       require_loop_vectorize = true;
    1149              :                       break;
    1150              :                     }
    1151              :                 }
    1152       714884 :               gimple_set_uid (stmt, -1);
    1153       714884 :               gimple_set_visited (stmt, false);
    1154              :             }
    1155        20180 :           if (!require_loop_vectorize)
    1156              :             {
    1157        19359 :               tree arg = gimple_call_arg (loop_vectorized_call, 1);
    1158        19359 :               class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
    1159        19359 :               if (vect_slp_if_converted_bb (bb, scalar_loop))
    1160              :                 {
    1161           75 :                   fold_loop_internal_call (loop_vectorized_call,
    1162              :                                            boolean_true_node);
    1163           75 :                   loop_vectorized_call = NULL;
    1164           75 :                   ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
    1165              :                 }
    1166              :             }
    1167              :         }
    1168              :       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
    1169              :          loop, don't vectorize its inner loop; we'll attempt to
    1170              :          vectorize LOOP_VECTORIZED guarded inner loop of the scalar
    1171              :          loop version.  */
    1172        21999 :       if (loop_vectorized_call && loop->inner)
    1173          803 :         loop->inner->dont_vectorize = true;
    1174       409678 :       return ret;
    1175              :     }
    1176              : 
    1177        54960 :   if (!dbg_cnt (vect_loop))
    1178              :     {
    1179              :       /* Free existing information if loop is analyzed with some
    1180              :          assumptions.  */
    1181            4 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1182            0 :         vect_free_loop_info_assumptions (loop);
    1183            4 :       return ret;
    1184              :     }
    1185              : 
    1186        54956 :   (*num_vectorized_loops)++;
    1187              :   /* Transform LOOP and its epilogues.  */
    1188        54956 :   ret |= vect_transform_loops (simduid_to_vf_htab, loop,
    1189              :                                loop_vectorized_call, fun);
    1190              : 
    1191        54956 :   if (loop_vectorized_call)
    1192              :     {
    1193         7807 :       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
    1194         7807 :       ret |= TODO_cleanup_cfg;
    1195              :     }
    1196        54956 :   if (loop_dist_alias_call)
    1197              :     {
    1198            8 :       tree value = gimple_call_arg (loop_dist_alias_call, 1);
    1199            8 :       fold_loop_internal_call (loop_dist_alias_call, value);
    1200            8 :       ret |= TODO_cleanup_cfg;
    1201              :     }
    1202              : 
    1203              :   return ret;
    1204       464638 : }
    1205              : 
    1206              : /* Try to vectorize LOOP.  */
    1207              : 
    1208              : static unsigned
    1209       495807 : try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1210              :                     unsigned *num_vectorized_loops, loop_p loop,
    1211              :                     function *fun)
    1212              : {
    1213       495807 :   if (!((flag_tree_loop_vectorize
    1214       491235 :          && optimize_loop_nest_for_speed_p (loop))
    1215        32972 :         || loop->force_vectorize))
    1216              :     return 0;
    1217              : 
    1218       464638 :   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
    1219              :                                vect_loop_vectorized_call (loop),
    1220       464638 :                                vect_loop_dist_alias_call (loop, fun), fun);
    1221              : }
    1222              : 
    1223              : 
    1224              : /* Loop autovectorization.  */
    1225              : 
    1226              : namespace {
    1227              : 
    1228              : const pass_data pass_data_vectorize =
    1229              : {
    1230              :   GIMPLE_PASS, /* type */
    1231              :   "vect", /* name */
    1232              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1233              :   TV_TREE_VECTORIZATION, /* tv_id */
    1234              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1235              :   0, /* properties_provided */
    1236              :   0, /* properties_destroyed */
    1237              :   0, /* todo_flags_start */
    1238              :   0, /* todo_flags_finish */
    1239              : };
    1240              : 
    1241              : class pass_vectorize : public gimple_opt_pass
    1242              : {
    1243              : public:
    1244       298828 :   pass_vectorize (gcc::context *ctxt)
    1245       597656 :     : gimple_opt_pass (pass_data_vectorize, ctxt)
    1246              :   {}
    1247              : 
    1248              :   /* opt_pass methods: */
    1249       240924 :   bool gate (function *fun) final override
    1250              :     {
    1251       240924 :       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
    1252              :     }
    1253              : 
    1254              :   unsigned int execute (function *) final override;
    1255              : 
    1256              : }; // class pass_vectorize
    1257              : 
    1258              : /* Function vectorize_loops.
    1259              : 
    1260              :    Entry point to loop vectorization phase.  */
    1261              : 
    1262              : unsigned
    1263       207522 : pass_vectorize::execute (function *fun)
    1264              : {
    1265       207522 :   unsigned int i;
    1266       207522 :   unsigned int num_vectorized_loops = 0;
    1267       207522 :   unsigned int vect_loops_num;
    1268       207522 :   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
    1269       207522 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1270       207522 :   bool any_ifcvt_loops = false;
    1271       207522 :   unsigned ret = 0;
    1272              : 
    1273       207522 :   vect_loops_num = number_of_loops (fun);
    1274              : 
    1275              :   /* Bail out if there are no loops.  */
    1276       207522 :   if (vect_loops_num <= 1)
    1277              :     return 0;
    1278              : 
    1279       207522 :   vect_slp_init ();
    1280              : 
    1281       207522 :   if (fun->has_simduid_loops)
    1282         5601 :     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1283              : 
    1284              :   /*  ----------- Analyze loops. -----------  */
    1285       207522 :   enable_ranger (fun);
    1286              : 
    1287              :   /* If some loop was duplicated, it gets bigger number
    1288              :      than all previously defined loops.  This fact allows us to run
    1289              :      only over initial loops skipping newly generated ones.  */
    1290      1153140 :   for (auto loop : loops_list (fun, 0))
    1291       530574 :     if (loop->dont_vectorize)
    1292              :       {
    1293        35690 :         any_ifcvt_loops = true;
    1294              :         /* If-conversion sometimes versions both the outer loop
    1295              :            (for the case when outer loop vectorization might be
    1296              :            desirable) as well as the inner loop in the scalar version
    1297              :            of the loop.  So we have:
    1298              :             if (LOOP_VECTORIZED (1, 3))
    1299              :               {
    1300              :                 loop1
    1301              :                   loop2
    1302              :               }
    1303              :             else
    1304              :               loop3 (copy of loop1)
    1305              :                 if (LOOP_VECTORIZED (4, 5))
    1306              :                   loop4 (copy of loop2)
    1307              :                 else
    1308              :                   loop5 (copy of loop4)
    1309              :            If loops' iteration gives us loop3 first (which has
    1310              :            dont_vectorize set), make sure to process loop1 before loop4;
    1311              :            so that we can prevent vectorization of loop4 if loop1
    1312              :            is successfully vectorized.  */
    1313        35690 :         if (loop->inner)
    1314              :           {
    1315         2168 :             gimple *loop_vectorized_call
    1316         2168 :               = vect_loop_vectorized_call (loop);
    1317         2168 :             if (loop_vectorized_call
    1318         2168 :                 && vect_loop_vectorized_call (loop->inner))
    1319              :               {
    1320          923 :                 tree arg = gimple_call_arg (loop_vectorized_call, 0);
    1321          923 :                 class loop *vector_loop
    1322          923 :                   = get_loop (fun, tree_to_shwi (arg));
    1323          923 :                 if (vector_loop && vector_loop != loop)
    1324              :                   {
    1325              :                     /* Make sure we don't vectorize it twice.  */
    1326          923 :                     vector_loop->dont_vectorize = true;
    1327          923 :                     ret |= try_vectorize_loop (simduid_to_vf_htab,
    1328              :                                                &num_vectorized_loops,
    1329              :                                                vector_loop, fun);
    1330              :                   }
    1331              :               }
    1332              :           }
    1333              :       }
    1334              :     else
    1335       494884 :       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
    1336       207522 :                                  loop, fun);
    1337              : 
    1338       207522 :   vect_location = dump_user_location_t ();
    1339              : 
    1340       207522 :   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
    1341       207522 :   if (dump_enabled_p ()
    1342       207522 :       || (num_vectorized_loops > 0 && dump_enabled_p ()))
    1343        11662 :     dump_printf_loc (MSG_NOTE, vect_location,
    1344              :                      "vectorized %u loops in function.\n",
    1345              :                      num_vectorized_loops);
    1346              : 
    1347              :   /*  ----------- Finalize. -----------  */
    1348       207522 :   disable_ranger (fun);
    1349              : 
    1350       207522 :   if (any_ifcvt_loops)
    1351       322930 :     for (i = 1; i < number_of_loops (fun); i++)
    1352              :       {
    1353       139705 :         class loop *loop = get_loop (fun, i);
    1354       139705 :         if (loop && loop->dont_vectorize)
    1355              :           {
    1356        38263 :             gimple *g = vect_loop_vectorized_call (loop);
    1357        38263 :             if (g)
    1358              :               {
    1359        21199 :                 fold_loop_internal_call (g, boolean_false_node);
    1360        21199 :                 loop->dont_vectorize = false;
    1361        21199 :                 ret |= TODO_cleanup_cfg;
    1362        21199 :                 g = NULL;
    1363              :               }
    1364              :             else
    1365        17064 :               g = vect_loop_dist_alias_call (loop, fun);
    1366              : 
    1367        38263 :             if (g)
    1368              :               {
    1369           30 :                 fold_loop_internal_call (g, boolean_false_node);
    1370           30 :                 loop->dont_vectorize = false;
    1371           30 :                 ret |= TODO_cleanup_cfg;
    1372              :               }
    1373              :           }
    1374              :       }
    1375              : 
    1376              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1377       207522 :   if (fun->has_simduid_loops)
    1378              :     {
    1379         5601 :       adjust_simduid_builtins (simduid_to_vf_htab, fun);
    1380              :       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
    1381         5601 :       scev_reset ();
    1382              :     }
    1383              :   /* Shrink any "omp array simd" temporary arrays to the
    1384              :      actual vectorization factors.  */
    1385       207522 :   if (simd_array_to_simduid_htab)
    1386         2204 :     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
    1387       207522 :   delete simduid_to_vf_htab;
    1388       207522 :   fun->has_simduid_loops = false;
    1389              : 
    1390       207522 :   if (num_vectorized_loops > 0)
    1391              :     {
    1392              :       /* We are collecting some corner cases where we need to update
    1393              :          virtual SSA form via the TODO but delete the queued update-SSA
    1394              :          state.  Force renaming if we think that might be necessary.  */
    1395        37281 :       if (ret & TODO_update_ssa_only_virtuals)
    1396           89 :         mark_virtual_operands_for_renaming (cfun);
    1397              :       /* If we vectorized any loop only virtual SSA form needs to be updated.
    1398              :          ???  Also while we try hard to update loop-closed SSA form we fail
    1399              :          to properly do this in some corner-cases (see PR56286).  */
    1400        37281 :       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
    1401        37281 :       ret |= TODO_cleanup_cfg;
    1402              :     }
    1403              : 
    1404      1852228 :   for (i = 1; i < number_of_loops (fun); i++)
    1405              :     {
    1406       718592 :       loop_vec_info loop_vinfo;
    1407       718592 :       bool has_mask_store;
    1408              : 
    1409       718592 :       class loop *loop = get_loop (fun, i);
    1410       718592 :       if (!loop || !loop->aux)
    1411       656785 :         continue;
    1412        61807 :       loop_vinfo = (loop_vec_info) loop->aux;
    1413        61807 :       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
    1414        61807 :       delete loop_vinfo;
    1415        61807 :       if (has_mask_store
    1416        61807 :           && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
    1417          493 :         optimize_mask_stores (loop);
    1418              : 
    1419        61807 :       auto_bitmap exit_bbs;
    1420              :       /* Perform local CSE, this esp. helps because we emit code for
    1421              :          predicates that need to be shared for optimal predicate usage.
    1422              :          However reassoc will re-order them and prevent CSE from working
    1423              :          as it should.  CSE only the loop body, not the entry.  */
    1424        61807 :       auto_vec<edge> exits = get_loop_exit_edges (loop);
    1425       248755 :       for (edge exit : exits)
    1426        63334 :         bitmap_set_bit (exit_bbs, exit->dest->index);
    1427              : 
    1428        61807 :       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
    1429        61807 :       do_rpo_vn (fun, entry, exit_bbs);
    1430              : 
    1431        61807 :       loop->aux = NULL;
    1432        61807 :     }
    1433              : 
    1434       207522 :   vect_slp_fini ();
    1435              : 
    1436       207522 :   return ret;
    1437              : }
    1438              : 
    1439              : } // anon namespace
    1440              : 
    1441              : gimple_opt_pass *
    1442       298828 : make_pass_vectorize (gcc::context *ctxt)
    1443              : {
    1444       298828 :   return new pass_vectorize (ctxt);
    1445              : }
    1446              : 
    1447              : /* Entry point to the simduid cleanup pass.  */
    1448              : 
    1449              : namespace {
    1450              : 
    1451              : const pass_data pass_data_simduid_cleanup =
    1452              : {
    1453              :   GIMPLE_PASS, /* type */
    1454              :   "simduid", /* name */
    1455              :   OPTGROUP_NONE, /* optinfo_flags */
    1456              :   TV_NONE, /* tv_id */
    1457              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1458              :   0, /* properties_provided */
    1459              :   0, /* properties_destroyed */
    1460              :   0, /* todo_flags_start */
    1461              :   0, /* todo_flags_finish */
    1462              : };
    1463              : 
    1464              : class pass_simduid_cleanup : public gimple_opt_pass
    1465              : {
    1466              : public:
    1467       597656 :   pass_simduid_cleanup (gcc::context *ctxt)
    1468      1195312 :     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
    1469              :   {}
    1470              : 
    1471              :   /* opt_pass methods: */
    1472       298828 :   opt_pass * clone () final override
    1473              :   {
    1474       298828 :     return new pass_simduid_cleanup (m_ctxt);
    1475              :   }
    1476      2528978 :   bool gate (function *fun) final override { return fun->has_simduid_loops; }
    1477              :   unsigned int execute (function *) final override;
    1478              : 
    1479              : }; // class pass_simduid_cleanup
    1480              : 
    1481              : unsigned int
    1482         2208 : pass_simduid_cleanup::execute (function *fun)
    1483              : {
    1484         2208 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1485              : 
    1486         2208 :   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1487              : 
    1488              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1489         2208 :   adjust_simduid_builtins (NULL, fun);
    1490              : 
    1491              :   /* Shrink any "omp array simd" temporary arrays to the
    1492              :      actual vectorization factors.  */
    1493         2208 :   if (simd_array_to_simduid_htab)
    1494            4 :     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
    1495         2208 :   fun->has_simduid_loops = false;
    1496         2208 :   return 0;
    1497              : }
    1498              : 
    1499              : }  // anon namespace
    1500              : 
    1501              : gimple_opt_pass *
    1502       298828 : make_pass_simduid_cleanup (gcc::context *ctxt)
    1503              : {
    1504       298828 :   return new pass_simduid_cleanup (ctxt);
    1505              : }
    1506              : 
    1507              : 
    1508              : /*  Entry point to basic block SLP phase.  */
    1509              : 
    1510              : namespace {
    1511              : 
    1512              : const pass_data pass_data_slp_vectorize =
    1513              : {
    1514              :   GIMPLE_PASS, /* type */
    1515              :   "slp", /* name */
    1516              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1517              :   TV_TREE_SLP_VECTORIZATION, /* tv_id */
    1518              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1519              :   0, /* properties_provided */
    1520              :   0, /* properties_destroyed */
    1521              :   0, /* todo_flags_start */
    1522              :   TODO_update_ssa, /* todo_flags_finish */
    1523              : };
    1524              : 
    1525              : class pass_slp_vectorize : public gimple_opt_pass
    1526              : {
    1527              : public:
    1528       597656 :   pass_slp_vectorize (gcc::context *ctxt)
    1529      1195312 :     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
    1530              :   {}
    1531              : 
    1532              :   /* opt_pass methods: */
    1533       298828 :   opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); }
    1534      1039824 :   bool gate (function *) final override { return flag_tree_slp_vectorize != 0; }
    1535              :   unsigned int execute (function *) final override;
    1536              : 
    1537              : }; // class pass_slp_vectorize
    1538              : 
    1539              : unsigned int
    1540       905907 : pass_slp_vectorize::execute (function *fun)
    1541              : {
    1542       905907 :   auto_purge_vect_location sentinel;
    1543       905907 :   basic_block bb;
    1544              : 
    1545       905907 :   bool in_loop_pipeline = scev_initialized_p ();
    1546       905907 :   if (!in_loop_pipeline)
    1547              :     {
    1548       700016 :       loop_optimizer_init (LOOPS_NORMAL);
    1549       700016 :       scev_initialize ();
    1550              :     }
    1551              : 
    1552              :   /* Mark all stmts as not belonging to the current region and unvisited.  */
    1553     11393593 :   FOR_EACH_BB_FN (bb, fun)
    1554              :     {
    1555     15156288 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    1556      4668602 :            gsi_next (&gsi))
    1557              :         {
    1558      4668602 :           gphi *stmt = gsi.phi ();
    1559      4668602 :           gimple_set_uid (stmt, -1);
    1560      4668602 :           gimple_set_visited (stmt, false);
    1561              :         }
    1562     98624488 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    1563     77649116 :            gsi_next (&gsi))
    1564              :         {
    1565     77649116 :           gimple *stmt = gsi_stmt (gsi);
    1566     77649116 :           gimple_set_uid (stmt, -1);
    1567     77649116 :           gimple_set_visited (stmt, false);
    1568              :         }
    1569              :     }
    1570              : 
    1571       905907 :   vect_slp_init ();
    1572              : 
    1573       905907 :   vect_slp_function (fun);
    1574              : 
    1575       905907 :   vect_slp_fini ();
    1576              : 
    1577       905907 :   if (!in_loop_pipeline)
    1578              :     {
    1579       700016 :       scev_finalize ();
    1580       700016 :       loop_optimizer_finalize ();
    1581              :     }
    1582              : 
    1583      1811814 :   return 0;
    1584       905907 : }
    1585              : 
    1586              : } // anon namespace
    1587              : 
    1588              : gimple_opt_pass *
    1589       298828 : make_pass_slp_vectorize (gcc::context *ctxt)
    1590              : {
    1591       298828 :   return new pass_slp_vectorize (ctxt);
    1592              : }
    1593              : 
    1594              : 
    1595              : /* Increase alignment of global arrays to improve vectorization potential.
    1596              :    TODO:
    1597              :    - Consider also structs that have an array field.
    1598              :    - Use ipa analysis to prune arrays that can't be vectorized?
    1599              :      This should involve global alignment analysis and in the future also
    1600              :      array padding.  */
    1601              : 
    1602              : static unsigned get_vec_alignment_for_type (tree);
    1603              : static hash_map<tree, unsigned> *type_align_map;
    1604              : 
    1605              : /* Return alignment of array's vector type corresponding to scalar type.
    1606              :    0 if no vector type exists.  */
    1607              : static unsigned
    1608            0 : get_vec_alignment_for_array_type (tree type)
    1609              : {
    1610            0 :   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
    1611            0 :   poly_uint64 array_size, vector_size;
    1612              : 
    1613            0 :   tree scalar_type = strip_array_types (type);
    1614            0 :   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
    1615            0 :   if (!vectype
    1616            0 :       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
    1617            0 :       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
    1618            0 :       || maybe_lt (array_size, vector_size))
    1619            0 :     return 0;
    1620              : 
    1621            0 :   return TYPE_ALIGN (vectype);
    1622              : }
    1623              : 
    1624              : /* Return alignment of field having maximum alignment of vector type
    1625              :    corresponding to it's scalar type. For now, we only consider fields whose
    1626              :    offset is a multiple of it's vector alignment.
    1627              :    0 if no suitable field is found.  */
    1628              : static unsigned
    1629            0 : get_vec_alignment_for_record_type (tree type)
    1630              : {
    1631            0 :   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
    1632              : 
    1633            0 :   unsigned max_align = 0, alignment;
    1634            0 :   HOST_WIDE_INT offset;
    1635            0 :   tree offset_tree;
    1636              : 
    1637            0 :   if (TYPE_PACKED (type))
    1638              :     return 0;
    1639              : 
    1640            0 :   unsigned *slot = type_align_map->get (type);
    1641            0 :   if (slot)
    1642            0 :     return *slot;
    1643              : 
    1644            0 :   for (tree field = first_field (type);
    1645            0 :        field != NULL_TREE;
    1646            0 :        field = DECL_CHAIN (field))
    1647              :     {
    1648              :       /* Skip if not FIELD_DECL or if alignment is set by user.  */
    1649            0 :       if (TREE_CODE (field) != FIELD_DECL
    1650            0 :           || DECL_USER_ALIGN (field)
    1651            0 :           || DECL_ARTIFICIAL (field))
    1652            0 :         continue;
    1653              : 
    1654              :       /* We don't need to process the type further if offset is variable,
    1655              :          since the offsets of remaining members will also be variable.  */
    1656            0 :       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
    1657            0 :           || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
    1658              :         break;
    1659              : 
    1660              :       /* Similarly stop processing the type if offset_tree
    1661              :          does not fit in unsigned HOST_WIDE_INT.  */
    1662            0 :       offset_tree = bit_position (field);
    1663            0 :       if (!tree_fits_uhwi_p (offset_tree))
    1664              :         break;
    1665              : 
    1666            0 :       offset = tree_to_uhwi (offset_tree);
    1667            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
    1668              : 
    1669              :       /* Get maximum alignment of vectorized field/array among those members
    1670              :          whose offset is multiple of the vector alignment.  */
    1671            0 :       if (alignment
    1672            0 :           && (offset % alignment == 0)
    1673            0 :           && (alignment > max_align))
    1674            0 :         max_align = alignment;
    1675              :     }
    1676              : 
    1677            0 :   type_align_map->put (type, max_align);
    1678            0 :   return max_align;
    1679              : }
    1680              : 
    1681              : /* Return alignment of vector type corresponding to decl's scalar type
    1682              :    or 0 if it doesn't exist or the vector alignment is lesser than
    1683              :    decl's alignment.  */
    1684              : static unsigned
    1685            0 : get_vec_alignment_for_type (tree type)
    1686              : {
    1687            0 :   if (type == NULL_TREE)
    1688              :     return 0;
    1689              : 
    1690            0 :   gcc_assert (TYPE_P (type));
    1691              : 
    1692            0 :   static unsigned alignment = 0;
    1693            0 :   switch (TREE_CODE (type))
    1694              :     {
    1695            0 :       case ARRAY_TYPE:
    1696            0 :         alignment = get_vec_alignment_for_array_type (type);
    1697            0 :         break;
    1698            0 :       case RECORD_TYPE:
    1699            0 :         alignment = get_vec_alignment_for_record_type (type);
    1700            0 :         break;
    1701            0 :       default:
    1702            0 :         alignment = 0;
    1703            0 :         break;
    1704              :     }
    1705              : 
    1706            0 :   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
    1707              : }
    1708              : 
    1709              : /* Entry point to increase_alignment pass.  */
    1710              : static unsigned int
    1711            0 : increase_alignment (void)
    1712              : {
    1713            0 :   varpool_node *vnode;
    1714              : 
    1715            0 :   vect_location = dump_user_location_t ();
    1716            0 :   type_align_map = new hash_map<tree, unsigned>;
    1717              : 
    1718              :   /* Increase the alignment of all global arrays for vectorization.  */
    1719            0 :   FOR_EACH_DEFINED_VARIABLE (vnode)
    1720              :     {
    1721            0 :       tree decl = vnode->decl;
    1722            0 :       unsigned int alignment;
    1723              : 
    1724            0 :       if ((decl_in_symtab_p (decl)
    1725            0 :           && !symtab_node::get (decl)->can_increase_alignment_p ())
    1726            0 :           || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
    1727            0 :         continue;
    1728              : 
    1729            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
    1730            0 :       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
    1731              :         {
    1732            0 :           vnode->increase_alignment (alignment);
    1733            0 :           if (dump_enabled_p ())
    1734            0 :             dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
    1735              :         }
    1736              :     }
    1737              : 
    1738            0 :   delete type_align_map;
    1739            0 :   return 0;
    1740              : }
    1741              : 
    1742              : 
    1743              : namespace {
    1744              : 
    1745              : const pass_data pass_data_ipa_increase_alignment =
    1746              : {
    1747              :   SIMPLE_IPA_PASS, /* type */
    1748              :   "increase_alignment", /* name */
    1749              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1750              :   TV_IPA_OPT, /* tv_id */
    1751              :   0, /* properties_required */
    1752              :   0, /* properties_provided */
    1753              :   0, /* properties_destroyed */
    1754              :   0, /* todo_flags_start */
    1755              :   0, /* todo_flags_finish */
    1756              : };
    1757              : 
    1758              : class pass_ipa_increase_alignment : public simple_ipa_opt_pass
    1759              : {
    1760              : public:
    1761       298828 :   pass_ipa_increase_alignment (gcc::context *ctxt)
    1762       597656 :     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
    1763              :   {}
    1764              : 
    1765              :   /* opt_pass methods: */
    1766       238720 :   bool gate (function *) final override
    1767              :     {
    1768       238720 :       return flag_section_anchors && flag_tree_loop_vectorize;
    1769              :     }
    1770              : 
    1771            0 :   unsigned int execute (function *) final override
    1772              :   {
    1773            0 :     return increase_alignment ();
    1774              :   }
    1775              : 
    1776              : }; // class pass_ipa_increase_alignment
    1777              : 
    1778              : } // anon namespace
    1779              : 
    1780              : simple_ipa_opt_pass *
    1781       298828 : make_pass_ipa_increase_alignment (gcc::context *ctxt)
    1782              : {
    1783       298828 :   return new pass_ipa_increase_alignment (ctxt);
    1784              : }
    1785              : 
    1786              : /* If the condition represented by T is a comparison or the SSA name
    1787              :    result of a comparison, extract the comparison's operands.  Represent
    1788              :    T as NE_EXPR <T, 0> otherwise.  */
    1789              : 
    1790              : void
    1791        63801 : scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
    1792              : {
    1793        63801 :   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
    1794              :     {
    1795            0 :       this->code = TREE_CODE (t);
    1796            0 :       this->op0 = TREE_OPERAND (t, 0);
    1797            0 :       this->op1 = TREE_OPERAND (t, 1);
    1798            0 :       this->inverted_p = false;
    1799            0 :       return;
    1800              :     }
    1801              : 
    1802        63801 :   if (TREE_CODE (t) == SSA_NAME)
    1803        26230 :     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
    1804              :       {
    1805        26230 :         tree_code code = gimple_assign_rhs_code (stmt);
    1806        26230 :         if (TREE_CODE_CLASS (code) == tcc_comparison)
    1807              :           {
    1808        17859 :             this->code = code;
    1809        17859 :             this->op0 = gimple_assign_rhs1 (stmt);
    1810        17859 :             this->op1 = gimple_assign_rhs2 (stmt);
    1811        17859 :             this->inverted_p = false;
    1812        17859 :             return;
    1813              :           }
    1814         8371 :         else if (code == BIT_NOT_EXPR)
    1815              :           {
    1816         3541 :             tree n_op = gimple_assign_rhs1 (stmt);
    1817         3541 :             if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
    1818              :               {
    1819         3541 :                 code = gimple_assign_rhs_code (stmt);
    1820         3541 :                 if (TREE_CODE_CLASS (code) == tcc_comparison)
    1821              :                   {
    1822         3503 :                     this->code = code;
    1823         3503 :                     this->op0 = gimple_assign_rhs1 (stmt);
    1824         3503 :                     this->op1 = gimple_assign_rhs2 (stmt);
    1825         3503 :                     this->inverted_p = true;
    1826         3503 :                     return;
    1827              :                   }
    1828              :               }
    1829              :           }
    1830              :       }
    1831              : 
    1832        42439 :   this->code = NE_EXPR;
    1833        42439 :   this->op0 = t;
    1834        42439 :   this->op1 = build_zero_cst (TREE_TYPE (t));
    1835        42439 :   this->inverted_p = false;
    1836              : }
    1837              : 
    1838              : /* See the comment above the declaration for details.  */
    1839              : 
    1840              : unsigned int
    1841            0 : vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
    1842              :                              stmt_vec_info stmt_info, slp_tree,
    1843              :                              tree vectype, int misalign,
    1844              :                              vect_cost_model_location where)
    1845              : {
    1846            0 :   unsigned int cost
    1847            0 :     = builtin_vectorization_cost (kind, vectype, misalign) * count;
    1848            0 :   return record_stmt_cost (stmt_info, where, cost);
    1849              : }
    1850              : 
    1851              : unsigned int
    1852      2620063 : vector_costs::add_slp_cost (slp_tree,
    1853              :                             const array_slice<stmt_info_for_cost> &cost_vec)
    1854              : {
    1855      2620063 :   unsigned int sum = 0;
    1856      5819165 :   for (auto item : cost_vec)
    1857      3199102 :     sum += ::add_stmt_cost (this, item.count, item.kind, item.stmt_info,
    1858              :                             item.node, item.vectype, item.misalign, item.where);
    1859      2620063 :   return sum;
    1860              : }
    1861              : 
    1862              : /* See the comment above the declaration for details.  */
    1863              : 
    1864              : void
    1865      1807703 : vector_costs::finish_cost (const vector_costs *)
    1866              : {
    1867      1807703 :   gcc_assert (!m_finished);
    1868      1807703 :   m_finished = true;
    1869      1807703 : }
    1870              : 
    1871              : /* Record a base cost of COST units against WHERE.  If STMT_INFO is
    1872              :    nonnull, use it to adjust the cost based on execution frequency
    1873              :    (where appropriate).  */
    1874              : 
    1875              : unsigned int
    1876            0 : vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
    1877              :                                 vect_cost_model_location where,
    1878              :                                 unsigned int cost)
    1879              : {
    1880            0 :   cost = adjust_cost_for_freq (stmt_info, where, cost);
    1881            0 :   m_costs[where] += cost;
    1882            0 :   return cost;
    1883              : }
    1884              : 
    1885              : /* COST is the base cost we have calculated for an operation in location WHERE.
    1886              :    If STMT_INFO is nonnull, use it to adjust the cost based on execution
    1887              :    frequency (where appropriate).  Return the adjusted cost.  */
    1888              : 
    1889              : unsigned int
    1890      7470412 : vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
    1891              :                                     vect_cost_model_location where,
    1892              :                                     unsigned int cost)
    1893              : {
    1894              :   /* Statements in an inner loop relative to the loop being
    1895              :      vectorized are weighted more heavily.  The value here is
    1896              :      arbitrary and could potentially be improved with analysis.  */
    1897      7470412 :   if (where == vect_body
    1898      7470412 :       && stmt_info
    1899      7470412 :       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
    1900              :     {
    1901        11885 :       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
    1902        11885 :       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
    1903              :     }
    1904      7470412 :   return cost;
    1905              : }
    1906              : 
    1907              : /* See the comment above the declaration for details.  */
    1908              : 
    1909              : bool
    1910        30975 : vector_costs::better_main_loop_than_p (const vector_costs *other) const
    1911              : {
    1912        30975 :   int diff = compare_inside_loop_cost (other);
    1913        30975 :   if (diff != 0)
    1914        30809 :     return diff < 0;
    1915              : 
    1916              :   /* If there's nothing to choose between the loop bodies, see whether
    1917              :      there's a difference in the prologue and epilogue costs.  */
    1918          166 :   diff = compare_outside_loop_cost (other);
    1919          166 :   if (diff != 0)
    1920          138 :     return diff < 0;
    1921              : 
    1922              :   return false;
    1923              : }
    1924              : 
    1925              : 
    1926              : /* See the comment above the declaration for details.  */
    1927              : 
    1928              : bool
    1929         1409 : vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
    1930              :                                            loop_vec_info main_loop) const
    1931              : {
    1932         1409 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    1933         1409 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    1934              : 
    1935         1409 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    1936         1409 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    1937              : 
    1938         1409 :   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
    1939         1409 :   unsigned HOST_WIDE_INT main_vf;
    1940         1409 :   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
    1941              :   /* If we can determine how many iterations are left for the epilogue
    1942              :      loop, that is if both the main loop's vectorization factor and number
    1943              :      of iterations are constant, then we use them to calculate the cost of
    1944              :      the epilogue loop together with a 'likely value' for the epilogues
    1945              :      vectorization factor.  Otherwise we use the main loop's vectorization
    1946              :      factor and the maximum poly value for the epilogue's.  If the target
    1947              :      has not provided with a sensible upper bound poly vectorization
    1948              :      factors are likely to be favored over constant ones.  */
    1949         1409 :   if (main_poly_vf.is_constant (&main_vf)
    1950         1409 :       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
    1951              :     {
    1952           94 :       unsigned HOST_WIDE_INT niters
    1953           94 :         = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
    1954           94 :       HOST_WIDE_INT other_likely_vf
    1955           94 :         = estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
    1956           94 :       HOST_WIDE_INT this_likely_vf
    1957           94 :         = estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
    1958              : 
    1959              :       /* If the epilogue is using partial vectors we account for the
    1960              :          partial iteration here too.  */
    1961           94 :       other_factor = niters / other_likely_vf;
    1962           94 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
    1963            0 :           && niters % other_likely_vf != 0)
    1964            0 :         other_factor++;
    1965              : 
    1966           94 :       this_factor = niters / this_likely_vf;
    1967           94 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
    1968            0 :           && niters % this_likely_vf != 0)
    1969            0 :         this_factor++;
    1970              :     }
    1971              :   else
    1972              :     {
    1973         1315 :       unsigned HOST_WIDE_INT main_vf_max
    1974         1315 :         = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
    1975         1315 :       unsigned HOST_WIDE_INT other_vf_max
    1976         1315 :         = estimated_poly_value (other_vf, POLY_VALUE_MAX);
    1977         1315 :       unsigned HOST_WIDE_INT this_vf_max
    1978         1315 :         = estimated_poly_value (this_vf, POLY_VALUE_MAX);
    1979              : 
    1980         1315 :       other_factor = CEIL (main_vf_max, other_vf_max);
    1981         1315 :       this_factor = CEIL (main_vf_max, this_vf_max);
    1982              : 
    1983              :       /* If the loop is not using partial vectors then it will iterate one
    1984              :          time less than one that does.  It is safe to subtract one here,
    1985              :          because the main loop's vf is always at least 2x bigger than that
    1986              :          of an epilogue.  */
    1987         1315 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
    1988         1302 :         other_factor -= 1;
    1989         1315 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
    1990         1313 :         this_factor -= 1;
    1991              :     }
    1992              : 
    1993              :   /* Compute the costs by multiplying the inside costs with the factor and
    1994              :      add the outside costs for a more complete picture.  The factor is the
    1995              :      amount of times we are expecting to iterate this epilogue.  */
    1996         1409 :   other_cost = other->body_cost () * other_factor;
    1997         1409 :   this_cost = this->body_cost () * this_factor;
    1998         1409 :   other_cost += other->outside_cost ();
    1999         1409 :   this_cost += this->outside_cost ();
    2000         1409 :   return this_cost < other_cost;
    2001              : }
    2002              : 
    2003              : /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
    2004              :    determine the return value of better_main_loop_than_p by comparing the
    2005              :    inside (loop body) costs of THIS and OTHER.  Return:
    2006              : 
    2007              :    * -1 if better_main_loop_than_p should return true.
    2008              :    * 1 if better_main_loop_than_p should return false.
    2009              :    * 0 if we can't decide.  */
    2010              : 
    2011              : int
    2012        30975 : vector_costs::compare_inside_loop_cost (const vector_costs *other) const
    2013              : {
    2014        30975 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    2015        30975 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    2016              : 
    2017        30975 :   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
    2018        30975 :   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
    2019              : 
    2020        30975 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    2021        30975 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    2022              : 
    2023              :   /* Limit the VFs to what is likely to be the maximum number of iterations,
    2024              :      to handle cases in which at least one loop_vinfo is fully-masked.  */
    2025        30975 :   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
    2026        30975 :   if (estimated_max_niter != -1)
    2027              :     {
    2028        29883 :       if (estimated_poly_value (this_vf, POLY_VALUE_MIN)
    2029              :           >= estimated_max_niter)
    2030              :         this_vf = estimated_max_niter;
    2031        29883 :       if (estimated_poly_value (other_vf, POLY_VALUE_MIN)
    2032              :           >= estimated_max_niter)
    2033              :         other_vf = estimated_max_niter;
    2034              :     }
    2035              : 
    2036              :   /* Check whether the (fractional) cost per scalar iteration is lower or
    2037              :      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
    2038        30975 :   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
    2039        30975 :   poly_int64 rel_other
    2040        30975 :     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
    2041              : 
    2042        30975 :   HOST_WIDE_INT est_rel_this_min
    2043        30975 :     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
    2044        30975 :   HOST_WIDE_INT est_rel_this_max
    2045        30975 :     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
    2046              : 
    2047        30975 :   HOST_WIDE_INT est_rel_other_min
    2048        30975 :     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
    2049        30975 :   HOST_WIDE_INT est_rel_other_max
    2050        30975 :     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
    2051              : 
    2052              :   /* Check first if we can make out an unambiguous total order from the minimum
    2053              :      and maximum estimates.  */
    2054        30975 :   if (est_rel_this_min < est_rel_other_min
    2055              :       && est_rel_this_max < est_rel_other_max)
    2056              :     return -1;
    2057              : 
    2058        29935 :   if (est_rel_other_min < est_rel_this_min
    2059              :       && est_rel_other_max < est_rel_this_max)
    2060        29769 :     return 1;
    2061              : 
    2062              :   /* When other_loop_vinfo uses a variable vectorization factor,
    2063              :      we know that it has a lower cost for at least one runtime VF.
    2064              :      However, we don't know how likely that VF is.
    2065              : 
    2066              :      One option would be to compare the costs for the estimated VFs.
    2067              :      The problem is that that can put too much pressure on the cost
    2068              :      model.  E.g. if the estimated VF is also the lowest possible VF,
    2069              :      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
    2070              :      for the estimated VF, we'd then choose this_loop_vinfo even
    2071              :      though (a) this_loop_vinfo might not actually be better than
    2072              :      other_loop_vinfo for that VF and (b) it would be significantly
    2073              :      worse at larger VFs.
    2074              : 
    2075              :      Here we go for a hacky compromise: pick this_loop_vinfo if it is
    2076              :      no more expensive than other_loop_vinfo even after doubling the
    2077              :      estimated other_loop_vinfo VF.  For all but trivial loops, this
    2078              :      ensures that we only pick this_loop_vinfo if it is significantly
    2079              :      better than other_loop_vinfo at the estimated VF.  */
    2080              :   if (est_rel_other_min != est_rel_this_min
    2081              :       || est_rel_other_max != est_rel_this_max)
    2082              :     {
    2083              :       HOST_WIDE_INT est_rel_this_likely
    2084              :         = estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
    2085              :       HOST_WIDE_INT est_rel_other_likely
    2086              :         = estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
    2087              : 
    2088              :       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
    2089              :     }
    2090              : 
    2091              :   return 0;
    2092              : }
    2093              : 
    2094              : /* A <=>-style subroutine of better_main_loop_than_p, used when there is
    2095              :    nothing to choose between the inside (loop body) costs of THIS and OTHER.
    2096              :    Check whether we can determine the return value of better_main_loop_than_p
    2097              :    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
    2098              :    Return:
    2099              : 
    2100              :    * -1 if better_main_loop_than_p should return true.
    2101              :    * 1 if better_main_loop_than_p should return false.
    2102              :    * 0 if we can't decide.  */
    2103              : 
    2104              : int
    2105          166 : vector_costs::compare_outside_loop_cost (const vector_costs *other) const
    2106              : {
    2107          166 :   auto this_outside_cost = this->outside_cost ();
    2108          166 :   auto other_outside_cost = other->outside_cost ();
    2109          166 :   if (this_outside_cost != other_outside_cost)
    2110          138 :     return this_outside_cost < other_outside_cost ? -1 : 1;
    2111              : 
    2112              :   return 0;
    2113              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.