LCOV - code coverage report
Current view: top level - gcc - tree-vectorizer.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.8 % 931 827
Test Date: 2026-05-11 19:44:49 Functions: 89.1 % 64 57
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : /* Loop and basic block vectorizer.
      22              : 
      23              :   This file contains drivers for the three vectorizers:
      24              :   (1) loop vectorizer (inter-iteration parallelism),
      25              :   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
      26              :       vectorizer)
      27              :   (3) BB vectorizer (out-of-loops), aka SLP
      28              : 
      29              :   The rest of the vectorizer's code is organized as follows:
      30              :   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
      31              :     used by drivers (1) and (2).
      32              :   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
      33              :     drivers (1) and (2).
      34              :   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
      35              :     used by drivers (2) and (3).
      36              :   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
      37              :   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
      38              :     manipulations (used by all).
      39              :   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
      40              : 
      41              :   Here's a poor attempt at illustrating that:
      42              : 
      43              :      tree-vectorizer.cc:
      44              :      loop_vect()  loop_aware_slp()  slp_vect()
      45              :           |        /           \          /
      46              :           |       /             \        /
      47              :           tree-vect-loop.cc  tree-vect-slp.cc
      48              :                 | \      \  /      /   |
      49              :                 |  \      \/      /    |
      50              :                 |   \     /\     /     |
      51              :                 |    \   /  \   /      |
      52              :          tree-vect-stmts.cc  tree-vect-data-refs.cc
      53              :                        \      /
      54              :                     tree-vect-patterns.cc
      55              : */
      56              : 
      57              : #include "config.h"
      58              : #include "system.h"
      59              : #include "coretypes.h"
      60              : #include "backend.h"
      61              : #include "tree.h"
      62              : #include "gimple.h"
      63              : #include "predict.h"
      64              : #include "tree-pass.h"
      65              : #include "ssa.h"
      66              : #include "cgraph.h"
      67              : #include "fold-const.h"
      68              : #include "stor-layout.h"
      69              : #include "gimple-iterator.h"
      70              : #include "gimple-walk.h"
      71              : #include "tree-ssa-loop-manip.h"
      72              : #include "tree-ssa-loop-niter.h"
      73              : #include "tree-cfg.h"
      74              : #include "cfgloop.h"
      75              : #include "tree-vectorizer.h"
      76              : #include "tree-ssa-propagate.h"
      77              : #include "dbgcnt.h"
      78              : #include "tree-scalar-evolution.h"
      79              : #include "stringpool.h"
      80              : #include "attribs.h"
      81              : #include "gimple-pretty-print.h"
      82              : #include "opt-problem.h"
      83              : #include "internal-fn.h"
      84              : #include "tree-ssa-sccvn.h"
      85              : #include "tree-into-ssa.h"
      86              : #include "gimple-range.h"
      87              : 
      88              : /* Loop or bb location, with hotness information.  */
      89              : dump_user_location_t vect_location;
      90              : 
      91              : /* auto_purge_vect_location's dtor: reset the vect_location
      92              :    global, to avoid stale location_t values that could reference
      93              :    GC-ed blocks.  */
      94              : 
      95      1370994 : auto_purge_vect_location::~auto_purge_vect_location ()
      96              : {
      97      1370994 :   vect_location = dump_user_location_t ();
      98      1370994 : }
      99              : 
     100              : /* Dump a cost entry according to args to F.  */
     101              : 
     102              : void
     103       218578 : dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
     104              :                 stmt_vec_info stmt_info, slp_tree node, tree,
     105              :                 int misalign, unsigned cost,
     106              :                 enum vect_cost_model_location where)
     107              : {
     108       218578 :   if (stmt_info)
     109              :     {
     110       201261 :       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
     111       201261 :       fprintf (f, " ");
     112              :     }
     113        17317 :   else if (node)
     114         3457 :     fprintf (f, "node %p ", (void *)node);
     115              :   else
     116        13860 :     fprintf (f, "<unknown> ");
     117       218578 :   fprintf (f, "%d times ", count);
     118       218578 :   const char *ks = "unknown";
     119       218578 :   switch (kind)
     120              :     {
     121        49214 :     case scalar_stmt:
     122        49214 :       ks = "scalar_stmt";
     123        49214 :       break;
     124        37052 :     case scalar_load:
     125        37052 :       ks = "scalar_load";
     126        37052 :       break;
     127        27930 :     case scalar_store:
     128        27930 :       ks = "scalar_store";
     129        27930 :       break;
     130        34777 :     case vector_stmt:
     131        34777 :       ks = "vector_stmt";
     132        34777 :       break;
     133        22008 :     case vector_load:
     134        22008 :       ks = "vector_load";
     135        22008 :       break;
     136            0 :     case vector_gather_load:
     137            0 :       ks = "vector_gather_load";
     138            0 :       break;
     139         7846 :     case unaligned_load:
     140         7846 :       ks = "unaligned_load";
     141         7846 :       break;
     142         4648 :     case unaligned_store:
     143         4648 :       ks = "unaligned_store";
     144         4648 :       break;
     145         8881 :     case vector_store:
     146         8881 :       ks = "vector_store";
     147         8881 :       break;
     148            0 :     case vector_scatter_store:
     149            0 :       ks = "vector_scatter_store";
     150            0 :       break;
     151         3337 :     case vec_to_scalar:
     152         3337 :       ks = "vec_to_scalar";
     153         3337 :       break;
     154         9385 :     case scalar_to_vec:
     155         9385 :       ks = "scalar_to_vec";
     156         9385 :       break;
     157           12 :     case cond_branch_not_taken:
     158           12 :       ks = "cond_branch_not_taken";
     159           12 :       break;
     160          544 :     case cond_branch_taken:
     161          544 :       ks = "cond_branch_taken";
     162          544 :       break;
     163         6432 :     case vec_perm:
     164         6432 :       ks = "vec_perm";
     165         6432 :       break;
     166         5408 :     case vec_promote_demote:
     167         5408 :       ks = "vec_promote_demote";
     168         5408 :       break;
     169         1104 :     case vec_construct:
     170         1104 :       ks = "vec_construct";
     171         1104 :       break;
     172              :     }
     173       218578 :   fprintf (f, "%s ", ks);
     174       218578 :   if (kind == unaligned_load || kind == unaligned_store)
     175        12494 :     fprintf (f, "(misalign %d) ", misalign);
     176       218578 :   fprintf (f, "costs %u ", cost);
     177       218578 :   const char *ws = "unknown";
     178       218578 :   switch (where)
     179              :     {
     180        23184 :     case vect_prologue:
     181        23184 :       ws = "prologue";
     182        23184 :       break;
     183       187992 :     case vect_body:
     184       187992 :       ws = "body";
     185       187992 :       break;
     186         7402 :     case vect_epilogue:
     187         7402 :       ws = "epilogue";
     188         7402 :       break;
     189              :     }
     190       218578 :   fprintf (f, "in %s\n", ws);
     191       218578 : }
     192              : 
     193              : /* For mapping simduid to vectorization factor.  */
     194              : 
     195              : class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
     196              : {
     197              : public:
     198              :   unsigned int simduid;
     199              :   poly_uint64 vf;
     200              : 
     201              :   /* hash_table support.  */
     202              :   static inline hashval_t hash (const simduid_to_vf *);
     203              :   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
     204              : };
     205              : 
     206              : inline hashval_t
     207         7776 : simduid_to_vf::hash (const simduid_to_vf *p)
     208              : {
     209         7776 :   return p->simduid;
     210              : }
     211              : 
     212              : inline int
     213        14243 : simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
     214              : {
     215        14243 :   return p1->simduid == p2->simduid;
     216              : }
     217              : 
     218              : /* This hash maps the OMP simd array to the corresponding simduid used
     219              :    to index into it.  Like thus,
     220              : 
     221              :         _7 = GOMP_SIMD_LANE (simduid.0)
     222              :         ...
     223              :         ...
     224              :         D.1737[_7] = stuff;
     225              : 
     226              : 
     227              :    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
     228              :    simduid.0.  */
     229              : 
     230              : struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
     231              : {
     232              :   tree decl;
     233              :   unsigned int simduid;
     234              : 
     235              :   /* hash_table support.  */
     236              :   static inline hashval_t hash (const simd_array_to_simduid *);
     237              :   static inline int equal (const simd_array_to_simduid *,
     238              :                            const simd_array_to_simduid *);
     239              : };
     240              : 
     241              : inline hashval_t
     242        24070 : simd_array_to_simduid::hash (const simd_array_to_simduid *p)
     243              : {
     244        24070 :   return DECL_UID (p->decl);
     245              : }
     246              : 
     247              : inline int
     248        16817 : simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
     249              :                               const simd_array_to_simduid *p2)
     250              : {
     251        16817 :   return p1->decl == p2->decl;
     252              : }
     253              : 
     254              : /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
     255              :    into their corresponding constants and remove
     256              :    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
     257              : 
     258              : static void
     259         7809 : adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
     260              : {
     261         7809 :   basic_block bb;
     262              : 
     263       118627 :   FOR_EACH_BB_FN (bb, fun)
     264              :     {
     265       110818 :       gimple_stmt_iterator i;
     266              : 
     267       739164 :       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
     268              :         {
     269       517528 :           poly_uint64 vf = 1;
     270       517528 :           enum internal_fn ifn;
     271       517528 :           gimple *stmt = gsi_stmt (i);
     272       517528 :           tree t;
     273       517528 :           if (!is_gimple_call (stmt)
     274       517528 :               || !gimple_call_internal_p (stmt))
     275              :             {
     276       509965 :               gsi_next (&i);
     277       510881 :               continue;
     278              :             }
     279         7563 :           ifn = gimple_call_internal_fn (stmt);
     280         7563 :           switch (ifn)
     281              :             {
     282         6647 :             case IFN_GOMP_SIMD_LANE:
     283         6647 :             case IFN_GOMP_SIMD_VF:
     284         6647 :             case IFN_GOMP_SIMD_LAST_LANE:
     285         6647 :               break;
     286          158 :             case IFN_GOMP_SIMD_ORDERED_START:
     287          158 :             case IFN_GOMP_SIMD_ORDERED_END:
     288          158 :               if (integer_onep (gimple_call_arg (stmt, 0)))
     289              :                 {
     290            5 :                   enum built_in_function bcode
     291              :                     = (ifn == IFN_GOMP_SIMD_ORDERED_START
     292           10 :                        ? BUILT_IN_GOMP_ORDERED_START
     293              :                        : BUILT_IN_GOMP_ORDERED_END);
     294           10 :                   gimple *g
     295           10 :                     = gimple_build_call (builtin_decl_explicit (bcode), 0);
     296           10 :                   gimple_move_vops (g, stmt);
     297           10 :                   gsi_replace (&i, g, true);
     298           10 :                   continue;
     299           10 :                 }
     300          148 :               gsi_remove (&i, true);
     301          148 :               unlink_stmt_vdef (stmt);
     302          148 :               continue;
     303          758 :             default:
     304          758 :               gsi_next (&i);
     305          758 :               continue;
     306          906 :             }
     307         6647 :           tree arg = gimple_call_arg (stmt, 0);
     308         6647 :           gcc_assert (arg != NULL_TREE);
     309         6647 :           gcc_assert (TREE_CODE (arg) == SSA_NAME);
     310         6647 :           simduid_to_vf *p = NULL, data;
     311         6647 :           data.simduid = DECL_UID (SSA_NAME_VAR (arg));
     312              :           /* Need to nullify loop safelen field since it's value is not
     313              :              valid after transformation.  */
     314         6647 :           if (bb->loop_father && bb->loop_father->safelen > 0)
     315         2169 :             bb->loop_father->safelen = 0;
     316         6647 :           if (htab)
     317              :             {
     318         4742 :               p = htab->find (&data);
     319         4742 :               if (p)
     320         4701 :                 vf = p->vf;
     321              :             }
     322         6647 :           switch (ifn)
     323              :             {
     324          969 :             case IFN_GOMP_SIMD_VF:
     325          969 :               t = build_int_cst (unsigned_type_node, vf);
     326          969 :               break;
     327         3484 :             case IFN_GOMP_SIMD_LANE:
     328         3484 :               t = build_int_cst (unsigned_type_node, 0);
     329         3484 :               break;
     330         2194 :             case IFN_GOMP_SIMD_LAST_LANE:
     331         2194 :               t = gimple_call_arg (stmt, 1);
     332         2194 :               break;
     333              :             default:
     334              :               gcc_unreachable ();
     335              :             }
     336         6647 :           tree lhs = gimple_call_lhs (stmt);
     337         6647 :           if (lhs)
     338         6590 :             replace_uses_by (lhs, t);
     339         6647 :           release_defs (stmt);
     340         6647 :           gsi_remove (&i, true);
     341              :         }
     342              :     }
     343         7809 : }
     344              : 
     345              : /* Helper structure for note_simd_array_uses.  */
     346              : 
     347              : struct note_simd_array_uses_struct
     348              : {
     349              :   hash_table<simd_array_to_simduid> **htab;
     350              :   unsigned int simduid;
     351              : };
     352              : 
     353              : /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
     354              : 
     355              : static tree
     356        65131 : note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
     357              : {
     358        65131 :   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
     359        65131 :   struct note_simd_array_uses_struct *ns
     360              :     = (struct note_simd_array_uses_struct *) wi->info;
     361              : 
     362        65131 :   if (TYPE_P (*tp))
     363            0 :     *walk_subtrees = 0;
     364        65131 :   else if (VAR_P (*tp)
     365        12360 :            && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
     366        77491 :            && DECL_CONTEXT (*tp) == current_function_decl)
     367              :     {
     368        12360 :       simd_array_to_simduid data;
     369        12360 :       if (!*ns->htab)
     370         2208 :         *ns->htab = new hash_table<simd_array_to_simduid> (15);
     371        12360 :       data.decl = *tp;
     372        12360 :       data.simduid = ns->simduid;
     373        12360 :       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
     374        12360 :       if (*slot == NULL)
     375              :         {
     376         5501 :           simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
     377         5501 :           *p = data;
     378         5501 :           *slot = p;
     379              :         }
     380         6859 :       else if ((*slot)->simduid != ns->simduid)
     381            0 :         (*slot)->simduid = -1U;
     382        12360 :       *walk_subtrees = 0;
     383              :     }
     384        65131 :   return NULL_TREE;
     385              : }
     386              : 
     387              : /* Find "omp simd array" temporaries and map them to corresponding
     388              :    simduid.  */
     389              : 
     390              : static void
     391         7809 : note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
     392              : {
     393         7809 :   basic_block bb;
     394         7809 :   gimple_stmt_iterator gsi;
     395         7809 :   struct walk_stmt_info wi;
     396         7809 :   struct note_simd_array_uses_struct ns;
     397              : 
     398         7809 :   memset (&wi, 0, sizeof (wi));
     399         7809 :   wi.info = &ns;
     400         7809 :   ns.htab = htab;
     401              : 
     402       102470 :   FOR_EACH_BB_FN (bb, fun)
     403       583066 :     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     404              :       {
     405       393744 :         gimple *stmt = gsi_stmt (gsi);
     406       393744 :         if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
     407       387160 :           continue;
     408         7509 :         switch (gimple_call_internal_fn (stmt))
     409              :           {
     410         6620 :           case IFN_GOMP_SIMD_LANE:
     411         6620 :           case IFN_GOMP_SIMD_VF:
     412         6620 :           case IFN_GOMP_SIMD_LAST_LANE:
     413         6620 :             break;
     414          889 :           default:
     415          889 :             continue;
     416              :           }
     417         6620 :         tree lhs = gimple_call_lhs (stmt);
     418         6620 :         if (lhs == NULL_TREE)
     419           36 :           continue;
     420         6584 :         imm_use_iterator use_iter;
     421         6584 :         gimple *use_stmt;
     422         6584 :         ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
     423        31198 :         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
     424        18030 :           if (!is_gimple_debug (use_stmt))
     425        24522 :             walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
     426              :       }
     427         7809 : }
     428              : 
     429              : /* Shrink arrays with "omp simd array" attribute to the corresponding
     430              :    vectorization factor.  */
     431              : 
     432              : static void
     433         2208 : shrink_simd_arrays
     434              :   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
     435              :    hash_table<simduid_to_vf> *simduid_to_vf_htab)
     436              : {
     437         7709 :   for (hash_table<simd_array_to_simduid>::iterator iter
     438         2208 :          = simd_array_to_simduid_htab->begin ();
     439        13210 :        iter != simd_array_to_simduid_htab->end (); ++iter)
     440         5501 :     if ((*iter)->simduid != -1U)
     441              :       {
     442         5501 :         tree decl = (*iter)->decl;
     443         5501 :         poly_uint64 vf = 1;
     444         5501 :         if (simduid_to_vf_htab)
     445              :           {
     446         4574 :             simduid_to_vf *p = NULL, data;
     447         4574 :             data.simduid = (*iter)->simduid;
     448         4574 :             p = simduid_to_vf_htab->find (&data);
     449         4574 :             if (p)
     450         4540 :               vf = p->vf;
     451              :           }
     452         5501 :         tree atype
     453         5501 :           = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
     454         5501 :         TREE_TYPE (decl) = atype;
     455         5501 :         relayout_decl (decl);
     456              :       }
     457              : 
     458         2208 :   delete simd_array_to_simduid_htab;
     459         2208 : }
     460              : 
     461              : /* Initialize the vec_info with kind KIND_IN and target cost data
     462              :    TARGET_COST_DATA_IN.  */
     463              : 
     464      2763761 : vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
     465      2763761 :   : kind (kind_in),
     466      2763761 :     shared (shared_),
     467      2763761 :     stmt_vec_info_ro (false),
     468      2763761 :     bbs (NULL),
     469      2763761 :     nbbs (0),
     470      2763761 :     inv_pattern_def_seq (NULL)
     471              : {
     472      2763761 :   stmt_vec_infos.create (50);
     473      2763761 : }
     474              : 
     475      2763761 : vec_info::~vec_info ()
     476              : {
     477      4963107 :   for (slp_instance &instance : slp_instances)
     478      1113502 :     vect_free_slp_instance (instance);
     479              : 
     480      2763761 :   free_stmt_vec_infos ();
     481      2763761 : }
     482              : 
     483      2336857 : vec_info_shared::vec_info_shared ()
     484      2336857 :   : datarefs (vNULL),
     485      2336857 :     datarefs_copy (vNULL),
     486      2336857 :     ddrs (vNULL)
     487              : {
     488      2336857 : }
     489              : 
     490      2336857 : vec_info_shared::~vec_info_shared ()
     491              : {
     492      2336857 :   free_data_refs (datarefs);
     493      2336857 :   free_dependence_relations (ddrs);
     494      2336857 :   datarefs_copy.release ();
     495      2336857 : }
     496              : 
     497              : void
     498      2107125 : vec_info_shared::save_datarefs ()
     499              : {
     500      2107125 :   if (!flag_checking)
     501              :     return;
     502      3161549 :   datarefs_copy.reserve_exact (datarefs.length ());
     503     12725637 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     504     10618529 :     datarefs_copy.quick_push (*datarefs[i]);
     505              : }
     506              : 
     507              : void
     508       882306 : vec_info_shared::check_datarefs ()
     509              : {
     510       882306 :   if (!flag_checking)
     511              :     return;
     512      2643806 :   gcc_assert (datarefs.length () == datarefs_copy.length ());
     513     12197366 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     514     11315060 :     if (memcmp (&datarefs_copy[i], datarefs[i],
     515              :                 offsetof (data_reference, alt_indices)) != 0)
     516            0 :       gcc_unreachable ();
     517              : }
     518              : 
     519              : /* Record that STMT belongs to the vectorizable region.  Create and return
     520              :    an associated stmt_vec_info.  */
     521              : 
     522              : stmt_vec_info
     523     61479081 : vec_info::add_stmt (gimple *stmt)
     524              : {
     525     61479081 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     526     61479081 :   set_vinfo_for_stmt (stmt, res);
     527     61479081 :   return res;
     528              : }
     529              : 
     530              : /* Record that STMT belongs to the vectorizable region.  Create a new
     531              :    stmt_vec_info and mark VECINFO as being related and return the new
     532              :    stmt_vec_info.  */
     533              : 
     534              : stmt_vec_info
     535         1171 : vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
     536              : {
     537         1171 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     538         1171 :   res->pattern_stmt_p = true;
     539         1171 :   set_vinfo_for_stmt (stmt, res, false);
     540         1171 :   STMT_VINFO_RELATED_STMT (res) = stmt_info;
     541         1171 :   return res;
     542              : }
     543              : 
     544              : /* If STMT was previously associated with a stmt_vec_info and STMT now resides
     545              :    at a different address than before (e.g., because STMT is a phi node that has
     546              :    been resized), update the stored address to match the new one.  It is not
     547              :    possible to use lookup_stmt () to perform this task, because that function
     548              :    returns NULL if the stored stmt pointer does not match the one being looked
     549              :    up.  */
     550              : 
     551              : stmt_vec_info
     552        10856 : vec_info::resync_stmt_addr (gimple *stmt)
     553              : {
     554        10856 :   unsigned int uid = gimple_uid (stmt);
     555        10856 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     556              :     {
     557        10856 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     558        10856 :       if (res && res->stmt)
     559              :         {
     560        10856 :           res->stmt = stmt;
     561        10856 :           return res;
     562              :         }
     563              :     }
     564              :   return nullptr;
     565              : }
     566              : 
     567              : /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
     568              :    return null.  It is safe to call this function on any statement, even if
     569              :    it might not be part of the vectorizable region.  */
     570              : 
     571              : stmt_vec_info
     572    475123195 : vec_info::lookup_stmt (gimple *stmt)
     573              : {
     574    475123195 :   unsigned int uid = gimple_uid (stmt);
     575    475123195 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     576              :     {
     577    300345882 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     578    300345882 :       if (res && res->stmt == stmt)
     579    299992768 :         return res;
     580              :     }
     581              :   return NULL;
     582              : }
     583              : 
     584              : /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
     585              :    return that stmt_vec_info, otherwise return null.  It is safe to call
     586              :    this on arbitrary operands.  */
     587              : 
     588              : stmt_vec_info
     589     56176798 : vec_info::lookup_def (tree name)
     590              : {
     591     56176798 :   if (TREE_CODE (name) == SSA_NAME
     592     56176798 :       && !SSA_NAME_IS_DEFAULT_DEF (name))
     593     50939781 :     return lookup_stmt (SSA_NAME_DEF_STMT (name));
     594              :   return NULL;
     595              : }
     596              : 
     597              : /* See whether there is a single non-debug statement that uses LHS and
     598              :    whether that statement has an associated stmt_vec_info.  Return the
     599              :    stmt_vec_info if so, otherwise return null.  */
     600              : 
     601              : stmt_vec_info
     602         1914 : vec_info::lookup_single_use (tree lhs)
     603              : {
     604         1914 :   use_operand_p dummy;
     605         1914 :   gimple *use_stmt;
     606         1914 :   if (single_imm_use (lhs, &dummy, &use_stmt))
     607         1765 :     return lookup_stmt (use_stmt);
     608              :   return NULL;
     609              : }
     610              : 
     611              : /* Return vectorization information about DR.  */
     612              : 
     613              : dr_vec_info *
     614     49865160 : vec_info::lookup_dr (data_reference *dr)
     615              : {
     616     49865160 :   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
     617              :   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
     618     49865160 :   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
     619     49865160 :   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
     620              : }
     621              : 
     622              : /* Record that NEW_STMT_INFO now implements the same data reference
     623              :    as OLD_STMT_INFO.  */
     624              : 
     625              : void
     626         6385 : vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
     627              : {
     628         6385 :   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
     629         6385 :   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
     630         6385 :   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
     631         6385 :   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
     632         6385 :     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
     633         6385 :   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
     634         6385 :     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
     635         6385 :   STMT_VINFO_STRIDED_P (new_stmt_info)
     636         6385 :     = STMT_VINFO_STRIDED_P (old_stmt_info);
     637         6385 :   STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
     638         6385 :     = STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
     639         6385 : }
     640              : 
     641              : /* Permanently remove the statement described by STMT_INFO from the
     642              :    function.  */
     643              : 
     644              : void
     645      1489586 : vec_info::remove_stmt (stmt_vec_info stmt_info)
     646              : {
     647      1489586 :   gcc_assert (!stmt_info->pattern_stmt_p);
     648      1489586 :   set_vinfo_for_stmt (stmt_info->stmt, NULL);
     649      1489586 :   unlink_stmt_vdef (stmt_info->stmt);
     650      1489586 :   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
     651      1489586 :   gsi_remove (&si, true);
     652      1489586 :   release_defs (stmt_info->stmt);
     653      1489586 :   free_stmt_vec_info (stmt_info);
     654      1489586 : }
     655              : 
     656              : /* Replace the statement at GSI by NEW_STMT, both the vectorization
     657              :    information and the function itself.  STMT_INFO describes the statement
     658              :    at GSI.  */
     659              : 
     660              : void
     661         5193 : vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
     662              :                         gimple *new_stmt)
     663              : {
     664         5193 :   gimple *old_stmt = stmt_info->stmt;
     665         5193 :   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
     666         5193 :   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
     667         5193 :   stmt_info->stmt = new_stmt;
     668         5193 :   gsi_replace (gsi, new_stmt, true);
     669         5193 : }
     670              : 
     671              : /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
     672              :    not NULL it specifies whether to use the sub-region entry
     673              :    determined by it, currently used for loop vectorization to insert
     674              :    on the inner loop entry vs. the outer loop entry.  */
     675              : 
     676              : void
     677       100872 : vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
     678              : {
     679       100872 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
     680              :     {
     681        19154 :       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     682        19154 :       basic_block new_bb;
     683        19154 :       edge pe;
     684              : 
     685        19154 :       if (context && nested_in_vect_loop_p (loop, context))
     686              :         loop = loop->inner;
     687              : 
     688        19154 :       pe = loop_preheader_edge (loop);
     689        19154 :       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     690        19154 :       gcc_assert (!new_bb);
     691              :     }
     692              :   else
     693              :     {
     694        81718 :       gimple_stmt_iterator gsi_region_begin
     695        81718 :         = gsi_after_labels (bbs[0]);
     696        81718 :       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
     697              :     }
     698       100872 : }
     699              : 
     700              : /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
     701              : 
     702              : void
     703         3305 : vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
     704              : {
     705         3305 :   gimple_seq seq = NULL;
     706         3305 :   gimple_stmt_iterator gsi = gsi_start (seq);
     707         3305 :   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
     708         3305 :   insert_seq_on_entry (context, seq);
     709         3305 : }
     710              : 
     711              : /* Create and initialize a new stmt_vec_info struct for STMT.  */
     712              : 
     713              : stmt_vec_info
     714     61480252 : vec_info::new_stmt_vec_info (gimple *stmt)
     715              : {
     716     61480252 :   stmt_vec_info res = XCNEW (class _stmt_vec_info);
     717     61480252 :   res->stmt = stmt;
     718              : 
     719     61480252 :   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
     720     61480252 :   STMT_VINFO_VECTORIZABLE (res) = true;
     721     61480252 :   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
     722     61480252 :   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
     723     61480252 :   STMT_VINFO_REDUC_IDX (res) = -1;
     724     61480252 :   STMT_VINFO_REDUC_DEF (res) = NULL;
     725     61480252 :   STMT_VINFO_SLP_VECT_ONLY (res) = false;
     726              : 
     727     61480252 :   if (is_a <loop_vec_info> (this)
     728      8552402 :       && gimple_code (stmt) == GIMPLE_PHI
     729     62996058 :       && is_loop_header_bb_p (gimple_bb (stmt)))
     730      1505126 :     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
     731              :   else
     732     59975126 :     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
     733              : 
     734     61480252 :   STMT_SLP_TYPE (res) = not_vect;
     735              : 
     736              :   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
     737     61480252 :   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
     738              : 
     739     61480252 :   return res;
     740              : }
     741              : 
     742              : /* Associate STMT with INFO.  */
     743              : 
     744              : void
     745     62969838 : vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
     746              : {
     747     62969838 :   unsigned int uid = gimple_uid (stmt);
     748     62969838 :   if (uid == 0)
     749              :     {
     750     61480252 :       gcc_assert (!check_ro || !stmt_vec_info_ro);
     751     61480252 :       gcc_checking_assert (info);
     752     61480252 :       uid = stmt_vec_infos.length () + 1;
     753     61480252 :       gimple_set_uid (stmt, uid);
     754     61480252 :       stmt_vec_infos.safe_push (info);
     755              :     }
     756              :   else
     757              :     {
     758      1489586 :       gcc_checking_assert (info == NULL);
     759      1489586 :       stmt_vec_infos[uid - 1] = info;
     760              :     }
     761     62969838 : }
     762              : 
     763              : /* Free the contents of stmt_vec_infos.  */
     764              : 
     765              : void
     766      2763761 : vec_info::free_stmt_vec_infos (void)
     767              : {
     768     69771535 :   for (stmt_vec_info &info : stmt_vec_infos)
     769     61480252 :     if (info != NULL)
     770     59990666 :       free_stmt_vec_info (info);
     771      2763761 :   stmt_vec_infos.release ();
     772      2763761 : }
     773              : 
     774              : /* Free STMT_INFO.  */
     775              : 
     776              : void
     777     61480252 : vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
     778              : {
     779     61480252 :   if (stmt_info->pattern_stmt_p)
     780              :     {
     781      2372118 :       gimple_set_bb (stmt_info->stmt, NULL);
     782      2372118 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
     783      2372118 :       if (lhs && TREE_CODE (lhs) == SSA_NAME)
     784      2008464 :         release_ssa_name (lhs);
     785              :     }
     786              : 
     787     61480252 :   free (stmt_info);
     788     61480252 : }
     789              : 
     790              : /* Returns true if S1 dominates S2.  */
     791              : 
     792              : bool
     793       549136 : vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
     794              : {
     795       549136 :   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
     796              : 
     797              :   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
     798              :      SSA_NAME.  Assume it lives at the beginning of function and
     799              :      thus dominates everything.  */
     800       549136 :   if (!bb1 || s1 == s2)
     801              :     return true;
     802              : 
     803              :   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
     804       547113 :   if (!bb2)
     805              :     return false;
     806              : 
     807       547113 :   if (bb1 != bb2)
     808       188747 :     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
     809              : 
     810              :   /* PHIs in the same basic block are assumed to be
     811              :      executed all in parallel, if only one stmt is a PHI,
     812              :      it dominates the other stmt in the same basic block.  */
     813       358366 :   if (gimple_code (s1) == GIMPLE_PHI)
     814              :     return true;
     815              : 
     816       322646 :   if (gimple_code (s2) == GIMPLE_PHI)
     817              :     return false;
     818              : 
     819              :   /* Inserted vectorized stmts all have UID 0 while the original stmts
     820              :      in the IL have UID increasing within a BB.  Walk from both sides
     821              :      until we find the other stmt or a stmt with UID != 0.  */
     822       305014 :   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
     823       775064 :   while (gimple_uid (gsi_stmt (gsi1)) == 0)
     824              :     {
     825       587222 :       gsi_next (&gsi1);
     826       587222 :       if (gsi_end_p (gsi1))
     827              :         return false;
     828       585506 :       if (gsi_stmt (gsi1) == s2)
     829              :         return true;
     830              :     }
     831       187842 :   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
     832              :     return false;
     833              : 
     834       187842 :   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
     835       786120 :   while (gimple_uid (gsi_stmt (gsi2)) == 0)
     836              :     {
     837       610301 :       gsi_prev (&gsi2);
     838       610301 :       if (gsi_end_p (gsi2))
     839              :         return false;
     840       598286 :       if (gsi_stmt (gsi2) == s1)
     841              :         return true;
     842              :     }
     843       175819 :   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
     844              :     return false;
     845              : 
     846       175819 :   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
     847              :     return true;
     848              :   return false;
     849              : }
     850              : 
     851              : /* A helper function to free scev and LOOP niter information, as well as
     852              :    clear loop constraint LOOP_C_FINITE.  */
     853              : 
     854              : void
     855        43998 : vect_free_loop_info_assumptions (class loop *loop)
     856              : {
     857        43998 :   scev_reset_htab ();
     858              :   /* We need to explicitly reset upper bound information since they are
     859              :      used even after free_numbers_of_iterations_estimates.  */
     860        43998 :   loop->any_upper_bound = false;
     861        43998 :   loop->any_likely_upper_bound = false;
     862        43998 :   free_numbers_of_iterations_estimates (loop);
     863        43998 :   loop_constraint_clear (loop, LOOP_C_FINITE);
     864        43998 : }
     865              : 
     866              : /* If LOOP has been versioned during ifcvt, return the internal call
     867              :    guarding it.  */
     868              : 
     869              : gimple *
     870       517653 : vect_loop_vectorized_call (class loop *loop, gcond **cond)
     871              : {
     872       517653 :   basic_block bb = loop_preheader_edge (loop)->src;
     873       956644 :   gimple *g;
     874      1395635 :   do
     875              :     {
     876       956644 :       g = *gsi_last_bb (bb);
     877       607076 :       if ((g && gimple_code (g) == GIMPLE_COND)
     878      2051927 :           || !single_succ_p (bb))
     879              :         break;
     880       577630 :       if (!single_pred_p (bb))
     881              :         break;
     882       438991 :       bb = single_pred (bb);
     883              :     }
     884              :   while (1);
     885       517653 :   if (g && gimple_code (g) == GIMPLE_COND)
     886              :     {
     887       372699 :       if (cond)
     888            0 :         *cond = as_a <gcond *> (g);
     889       372699 :       gimple_stmt_iterator gsi = gsi_for_stmt (g);
     890       372699 :       gsi_prev (&gsi);
     891       372699 :       if (!gsi_end_p (gsi))
     892              :         {
     893       341536 :           g = gsi_stmt (gsi);
     894       341536 :           if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
     895       341536 :               && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
     896        30010 :                   || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
     897        60295 :             return g;
     898              :         }
     899              :     }
     900              :   return NULL;
     901              : }
     902              : 
     903              : /* If LOOP has been versioned during loop distribution, return the gurading
     904              :    internal call.  */
     905              : 
     906              : static gimple *
     907       481048 : vect_loop_dist_alias_call (class loop *loop, function *fun)
     908              : {
     909       481048 :   basic_block bb;
     910       481048 :   basic_block entry;
     911       481048 :   class loop *outer, *orig;
     912              : 
     913       481048 :   if (loop->orig_loop_num == 0)
     914              :     return NULL;
     915              : 
     916          162 :   orig = get_loop (fun, loop->orig_loop_num);
     917          162 :   if (orig == NULL)
     918              :     {
     919              :       /* The original loop is somehow destroyed.  Clear the information.  */
     920            0 :       loop->orig_loop_num = 0;
     921            0 :       return NULL;
     922              :     }
     923              : 
     924          162 :   if (loop != orig)
     925           91 :     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
     926              :   else
     927           71 :     bb = loop_preheader_edge (loop)->src;
     928              : 
     929          162 :   outer = bb->loop_father;
     930          162 :   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
     931              : 
     932              :   /* Look upward in dominance tree.  */
     933          743 :   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
     934          581 :        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
     935              :     {
     936          691 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
     937          691 :       if (!safe_is_a <gcond *> (*gsi))
     938          581 :         continue;
     939              : 
     940          508 :       gsi_prev (&gsi);
     941          508 :       if (gsi_end_p (gsi))
     942            8 :         continue;
     943              : 
     944          500 :       gimple *g = gsi_stmt (gsi);
     945              :       /* The guarding internal function call must have the same distribution
     946              :          alias id.  */
     947          500 :       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
     948          500 :           && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
     949       481048 :         return g;
     950              :     }
     951              :   return NULL;
     952              : }
     953              : 
     954              : /* Set the uids of all the statements in basic blocks inside loop
     955              :    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
     956              :    call guarding the loop which has been if converted.  */
     957              : static void
     958         7780 : set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
     959              :                   function *fun)
     960              : {
     961         7780 :   tree arg = gimple_call_arg (loop_vectorized_call, 1);
     962         7780 :   basic_block *bbs;
     963         7780 :   unsigned int i;
     964         7780 :   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
     965              : 
     966         7780 :   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
     967         7780 :   LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo)
     968         7780 :     = vec_init_loop_exit_info (scalar_loop);
     969         7780 :   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
     970              :                        == loop_vectorized_call);
     971              :   /* If we are going to vectorize outer loop, prevent vectorization
     972              :      of the inner loop in the scalar loop - either the scalar loop is
     973              :      thrown away, so it is a wasted work, or is used only for
     974              :      a few iterations.  */
     975         7780 :   if (scalar_loop->inner)
     976              :     {
     977          120 :       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
     978          120 :       if (g)
     979              :         {
     980          120 :           arg = gimple_call_arg (g, 0);
     981          120 :           get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
     982          120 :           fold_loop_internal_call (g, boolean_false_node);
     983              :         }
     984              :     }
     985         7780 :   bbs = get_loop_body (scalar_loop);
     986        42427 :   for (i = 0; i < scalar_loop->num_nodes; i++)
     987              :     {
     988        34647 :       basic_block bb = bbs[i];
     989        34647 :       gimple_stmt_iterator gsi;
     990        66597 :       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     991              :         {
     992        31950 :           gimple *phi = gsi_stmt (gsi);
     993        31950 :           gimple_set_uid (phi, 0);
     994              :         }
     995       175231 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     996              :         {
     997       105937 :           gimple *stmt = gsi_stmt (gsi);
     998       105937 :           gimple_set_uid (stmt, 0);
     999              :         }
    1000              :     }
    1001         7780 :   free (bbs);
    1002         7780 : }
    1003              : 
    1004              : /* Generate vectorized code for LOOP and its epilogues.  */
    1005              : 
    1006              : static unsigned
    1007        61412 : vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1008              :                       loop_p loop, gimple *loop_vectorized_call,
    1009              :                       function *fun)
    1010              : {
    1011        61412 :   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
    1012              : 
    1013        61412 :   if (loop_vectorized_call)
    1014         7780 :     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
    1015              : 
    1016        61412 :   unsigned HOST_WIDE_INT bytes;
    1017        61412 :   if (dump_enabled_p ())
    1018              :     {
    1019        21930 :       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
    1020        10965 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1021              :                          "%sloop vectorized using %s%wu byte vectors and"
    1022              :                          " unroll factor %u\n",
    1023        10965 :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1024              :                          ? "epilogue " : "",
    1025        10965 :                          LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    1026              :                          ? "masked " : "", bytes,
    1027              :                          (unsigned int) LOOP_VINFO_VECT_FACTOR
    1028        10965 :                                                  (loop_vinfo).to_constant ());
    1029              :       else
    1030              :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1031              :                          "%sloop vectorized using variable length vectors\n",
    1032              :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1033              :                          ? "epilogue " : "");
    1034              :     }
    1035              : 
    1036        61412 :   loop_p new_loop = vect_transform_loop (loop_vinfo,
    1037              :                                          loop_vectorized_call);
    1038              :   /* Now that the loop has been vectorized, allow it to be unrolled
    1039              :      etc.  */
    1040        61412 :   loop->force_vectorize = false;
    1041              : 
    1042        61412 :   if (loop->simduid)
    1043              :     {
    1044         1895 :       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
    1045         1895 :       if (!simduid_to_vf_htab)
    1046         1535 :         simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
    1047         1895 :       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
    1048         1895 :       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
    1049         1895 :       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
    1050         1895 :           = simduid_to_vf_data;
    1051              :     }
    1052              : 
    1053              :   /* We should not have to update virtual SSA form here but some
    1054              :      transforms involve creating new virtual definitions which makes
    1055              :      updating difficult.
    1056              :      We delay the actual update to the end of the pass but avoid
    1057              :      confusing ourselves by forcing need_ssa_update_p () to false.  */
    1058        61412 :   unsigned todo = 0;
    1059        61412 :   if (need_ssa_update_p (cfun))
    1060              :     {
    1061          119 :       gcc_assert (loop_vinfo->any_known_not_updated_vssa);
    1062          119 :       fun->gimple_df->ssa_renaming_needed = false;
    1063          119 :       todo |= TODO_update_ssa_only_virtuals;
    1064              :     }
    1065        61412 :   gcc_assert (!need_ssa_update_p (cfun));
    1066              : 
    1067              :   /* Epilogue of vectorized loop must be vectorized too.  */
    1068        61412 :   if (new_loop)
    1069         6824 :     todo |= vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
    1070              : 
    1071        61412 :   return todo;
    1072              : }
    1073              : 
    1074              : /* Try to vectorize LOOP.  */
    1075              : 
    1076              : static unsigned
    1077       464011 : try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1078              :                       unsigned *num_vectorized_loops, loop_p loop,
    1079              :                       gimple *loop_vectorized_call,
    1080              :                       gimple *loop_dist_alias_call,
    1081              :                       function *fun)
    1082              : {
    1083       464011 :   unsigned ret = 0;
    1084       464011 :   vec_info_shared shared;
    1085       464011 :   auto_purge_vect_location sentinel;
    1086       464011 :   vect_location = find_loop_location (loop);
    1087              : 
    1088       464011 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    1089       464011 :       && dump_enabled_p ())
    1090        14989 :     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
    1091              :                  "\nAnalyzing loop at %s:%d\n",
    1092        14989 :                  LOCATION_FILE (vect_location.get_location_t ()),
    1093        29978 :                  LOCATION_LINE (vect_location.get_location_t ()));
    1094              : 
    1095              :   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
    1096       464011 :   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, loop_vectorized_call,
    1097              :                                                     &shared);
    1098       464011 :   loop->aux = loop_vinfo;
    1099              : 
    1100       464011 :   if (!loop_vinfo)
    1101       409419 :     if (dump_enabled_p ())
    1102         5789 :       if (opt_problem *problem = loop_vinfo.get_problem ())
    1103              :         {
    1104         5789 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1105              :                            "couldn't vectorize loop\n");
    1106         5789 :           problem->emit_and_clear ();
    1107              :         }
    1108              : 
    1109       464011 :   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
    1110              :     {
    1111              :       /* Free existing information if loop is analyzed with some
    1112              :          assumptions.  */
    1113       409419 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1114         7876 :         vect_free_loop_info_assumptions (loop);
    1115              : 
    1116              :       /* If we applied if-conversion then try to vectorize the
    1117              :          BB of innermost loops.
    1118              :          ???  Ideally BB vectorization would learn to vectorize
    1119              :          control flow by applying if-conversion on-the-fly, the
    1120              :          following retains the if-converted loop body even when
    1121              :          only non-if-converted parts took part in BB vectorization.  */
    1122       409419 :       if (flag_tree_slp_vectorize != 0
    1123       408393 :           && loop_vectorized_call
    1124        20841 :           && ! loop->inner)
    1125              :         {
    1126        20040 :           basic_block bb = loop->header;
    1127        20040 :           bool require_loop_vectorize = false;
    1128        40080 :           for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
    1129       688497 :                !gsi_end_p (gsi); gsi_next (&gsi))
    1130              :             {
    1131       669278 :               gimple *stmt = gsi_stmt (gsi);
    1132       669278 :               gcall *call = dyn_cast <gcall *> (stmt);
    1133         1243 :               if (call && gimple_call_internal_p (call))
    1134              :                 {
    1135         1146 :                   internal_fn ifn = gimple_call_internal_fn (call);
    1136         1146 :                   if (ifn == IFN_MASK_LOAD
    1137         1146 :                       || ifn == IFN_MASK_STORE
    1138          728 :                       || ifn == IFN_MASK_CALL
    1139              :                       /* Don't keep the if-converted parts when the ifn with
    1140              :                          specific type is not supported by the backend.  */
    1141         1855 :                       || (direct_internal_fn_p (ifn)
    1142          384 :                           && !direct_internal_fn_supported_p
    1143          384 :                           (call, OPTIMIZE_FOR_SPEED)))
    1144              :                     {
    1145              :                       require_loop_vectorize = true;
    1146              :                       break;
    1147              :                     }
    1148              :                 }
    1149       668457 :               gimple_set_uid (stmt, -1);
    1150       668457 :               gimple_set_visited (stmt, false);
    1151              :             }
    1152        20040 :           if (!require_loop_vectorize)
    1153              :             {
    1154        19219 :               tree arg = gimple_call_arg (loop_vectorized_call, 1);
    1155        19219 :               class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
    1156        19219 :               if (vect_slp_if_converted_bb (bb, scalar_loop))
    1157              :                 {
    1158           75 :                   fold_loop_internal_call (loop_vectorized_call,
    1159              :                                            boolean_true_node);
    1160           75 :                   loop_vectorized_call = NULL;
    1161           75 :                   ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
    1162              :                 }
    1163              :             }
    1164              :         }
    1165              :       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
    1166              :          loop, don't vectorize its inner loop; we'll attempt to
    1167              :          vectorize LOOP_VECTORIZED guarded inner loop of the scalar
    1168              :          loop version.  */
    1169        21867 :       if (loop_vectorized_call && loop->inner)
    1170          803 :         loop->inner->dont_vectorize = true;
    1171       409419 :       return ret;
    1172              :     }
    1173              : 
    1174        54592 :   if (!dbg_cnt (vect_loop))
    1175              :     {
    1176              :       /* Free existing information if loop is analyzed with some
    1177              :          assumptions.  */
    1178            4 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1179            0 :         vect_free_loop_info_assumptions (loop);
    1180            4 :       return ret;
    1181              :     }
    1182              : 
    1183        54588 :   (*num_vectorized_loops)++;
    1184              :   /* Transform LOOP and its epilogues.  */
    1185        54588 :   ret |= vect_transform_loops (simduid_to_vf_htab, loop,
    1186              :                                loop_vectorized_call, fun);
    1187              : 
    1188        54588 :   if (loop_vectorized_call)
    1189              :     {
    1190         7780 :       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
    1191         7780 :       ret |= TODO_cleanup_cfg;
    1192              :     }
    1193        54588 :   if (loop_dist_alias_call)
    1194              :     {
    1195            8 :       tree value = gimple_call_arg (loop_dist_alias_call, 1);
    1196            8 :       fold_loop_internal_call (loop_dist_alias_call, value);
    1197            8 :       ret |= TODO_cleanup_cfg;
    1198              :     }
    1199              : 
    1200              :   return ret;
    1201       464011 : }
    1202              : 
    1203              : /* Try to vectorize LOOP.  */
    1204              : 
    1205              : static unsigned
    1206       495356 : try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1207              :                     unsigned *num_vectorized_loops, loop_p loop,
    1208              :                     function *fun)
    1209              : {
    1210       495356 :   if (!((flag_tree_loop_vectorize
    1211       490774 :          && optimize_loop_nest_for_speed_p (loop))
    1212        33148 :         || loop->force_vectorize))
    1213              :     return 0;
    1214              : 
    1215       464011 :   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
    1216              :                                vect_loop_vectorized_call (loop),
    1217       464011 :                                vect_loop_dist_alias_call (loop, fun), fun);
    1218              : }
    1219              : 
    1220              : 
    1221              : /* Loop autovectorization.  */
    1222              : 
    1223              : namespace {
    1224              : 
    1225              : const pass_data pass_data_vectorize =
    1226              : {
    1227              :   GIMPLE_PASS, /* type */
    1228              :   "vect", /* name */
    1229              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1230              :   TV_TREE_VECTORIZATION, /* tv_id */
    1231              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1232              :   0, /* properties_provided */
    1233              :   0, /* properties_destroyed */
    1234              :   0, /* todo_flags_start */
    1235              :   0, /* todo_flags_finish */
    1236              : };
    1237              : 
    1238              : class pass_vectorize : public gimple_opt_pass
    1239              : {
    1240              : public:
    1241       288047 :   pass_vectorize (gcc::context *ctxt)
    1242       576094 :     : gimple_opt_pass (pass_data_vectorize, ctxt)
    1243              :   {}
    1244              : 
    1245              :   /* opt_pass methods: */
    1246       240015 :   bool gate (function *fun) final override
    1247              :     {
    1248       240015 :       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
    1249              :     }
    1250              : 
    1251              :   unsigned int execute (function *) final override;
    1252              : 
    1253              : }; // class pass_vectorize
    1254              : 
    1255              : /* Function vectorize_loops.
    1256              : 
    1257              :    Entry point to loop vectorization phase.  */
    1258              : 
    1259              : unsigned
    1260       206946 : pass_vectorize::execute (function *fun)
    1261              : {
    1262       206946 :   unsigned int i;
    1263       206946 :   unsigned int num_vectorized_loops = 0;
    1264       206946 :   unsigned int vect_loops_num;
    1265       206946 :   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
    1266       206946 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1267       206946 :   bool any_ifcvt_loops = false;
    1268       206946 :   unsigned ret = 0;
    1269              : 
    1270       206946 :   vect_loops_num = number_of_loops (fun);
    1271              : 
    1272              :   /* Bail out if there are no loops.  */
    1273       206946 :   if (vect_loops_num <= 1)
    1274              :     return 0;
    1275              : 
    1276       206946 :   vect_slp_init ();
    1277              : 
    1278       206946 :   if (fun->has_simduid_loops)
    1279         5601 :     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1280              : 
    1281              :   /*  ----------- Analyze loops. -----------  */
    1282       206946 :   enable_ranger (fun);
    1283              : 
    1284              :   /* If some loop was duplicated, it gets bigger number
    1285              :      than all previously defined loops.  This fact allows us to run
    1286              :      only over initial loops skipping newly generated ones.  */
    1287      1150800 :   for (auto loop : loops_list (fun, 0))
    1288       529962 :     if (loop->dont_vectorize)
    1289              :       {
    1290        35529 :         any_ifcvt_loops = true;
    1291              :         /* If-conversion sometimes versions both the outer loop
    1292              :            (for the case when outer loop vectorization might be
    1293              :            desirable) as well as the inner loop in the scalar version
    1294              :            of the loop.  So we have:
    1295              :             if (LOOP_VECTORIZED (1, 3))
    1296              :               {
    1297              :                 loop1
    1298              :                   loop2
    1299              :               }
    1300              :             else
    1301              :               loop3 (copy of loop1)
    1302              :                 if (LOOP_VECTORIZED (4, 5))
    1303              :                   loop4 (copy of loop2)
    1304              :                 else
    1305              :                   loop5 (copy of loop4)
    1306              :            If loops' iteration gives us loop3 first (which has
    1307              :            dont_vectorize set), make sure to process loop1 before loop4;
    1308              :            so that we can prevent vectorization of loop4 if loop1
    1309              :            is successfully vectorized.  */
    1310        35529 :         if (loop->inner)
    1311              :           {
    1312         2168 :             gimple *loop_vectorized_call
    1313         2168 :               = vect_loop_vectorized_call (loop);
    1314         2168 :             if (loop_vectorized_call
    1315         2168 :                 && vect_loop_vectorized_call (loop->inner))
    1316              :               {
    1317          923 :                 tree arg = gimple_call_arg (loop_vectorized_call, 0);
    1318          923 :                 class loop *vector_loop
    1319          923 :                   = get_loop (fun, tree_to_shwi (arg));
    1320          923 :                 if (vector_loop && vector_loop != loop)
    1321              :                   {
    1322              :                     /* Make sure we don't vectorize it twice.  */
    1323          923 :                     vector_loop->dont_vectorize = true;
    1324          923 :                     ret |= try_vectorize_loop (simduid_to_vf_htab,
    1325              :                                                &num_vectorized_loops,
    1326              :                                                vector_loop, fun);
    1327              :                   }
    1328              :               }
    1329              :           }
    1330              :       }
    1331              :     else
    1332       494433 :       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
    1333       206946 :                                  loop, fun);
    1334              : 
    1335       206946 :   vect_location = dump_user_location_t ();
    1336              : 
    1337       206946 :   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
    1338       206946 :   if (dump_enabled_p ()
    1339       206946 :       || (num_vectorized_loops > 0 && dump_enabled_p ()))
    1340        11624 :     dump_printf_loc (MSG_NOTE, vect_location,
    1341              :                      "vectorized %u loops in function.\n",
    1342              :                      num_vectorized_loops);
    1343              : 
    1344              :   /*  ----------- Finalize. -----------  */
    1345       206946 :   disable_ranger (fun);
    1346              : 
    1347       206946 :   if (any_ifcvt_loops)
    1348       321090 :     for (i = 1; i < number_of_loops (fun); i++)
    1349              :       {
    1350       138919 :         class loop *loop = get_loop (fun, i);
    1351       138919 :         if (loop && loop->dont_vectorize)
    1352              :           {
    1353        38104 :             gimple *g = vect_loop_vectorized_call (loop);
    1354        38104 :             if (g)
    1355              :               {
    1356        21067 :                 fold_loop_internal_call (g, boolean_false_node);
    1357        21067 :                 loop->dont_vectorize = false;
    1358        21067 :                 ret |= TODO_cleanup_cfg;
    1359        21067 :                 g = NULL;
    1360              :               }
    1361              :             else
    1362        17037 :               g = vect_loop_dist_alias_call (loop, fun);
    1363              : 
    1364        38104 :             if (g)
    1365              :               {
    1366           28 :                 fold_loop_internal_call (g, boolean_false_node);
    1367           28 :                 loop->dont_vectorize = false;
    1368           28 :                 ret |= TODO_cleanup_cfg;
    1369              :               }
    1370              :           }
    1371              :       }
    1372              : 
    1373              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1374       206946 :   if (fun->has_simduid_loops)
    1375              :     {
    1376         5601 :       adjust_simduid_builtins (simduid_to_vf_htab, fun);
    1377              :       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
    1378         5601 :       scev_reset ();
    1379              :     }
    1380              :   /* Shrink any "omp array simd" temporary arrays to the
    1381              :      actual vectorization factors.  */
    1382       206946 :   if (simd_array_to_simduid_htab)
    1383         2204 :     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
    1384       206946 :   delete simduid_to_vf_htab;
    1385       206946 :   fun->has_simduid_loops = false;
    1386              : 
    1387       206946 :   if (num_vectorized_loops > 0)
    1388              :     {
    1389              :       /* We are collecting some corner cases where we need to update
    1390              :          virtual SSA form via the TODO but delete the queued update-SSA
    1391              :          state.  Force renaming if we think that might be necessary.  */
    1392        37010 :       if (ret & TODO_update_ssa_only_virtuals)
    1393           89 :         mark_virtual_operands_for_renaming (cfun);
    1394              :       /* If we vectorized any loop only virtual SSA form needs to be updated.
    1395              :          ???  Also while we try hard to update loop-closed SSA form we fail
    1396              :          to properly do this in some corner-cases (see PR56286).  */
    1397        37010 :       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
    1398        37010 :       ret |= TODO_cleanup_cfg;
    1399              :     }
    1400              : 
    1401      1849488 :   for (i = 1; i < number_of_loops (fun); i++)
    1402              :     {
    1403       717798 :       loop_vec_info loop_vinfo;
    1404       717798 :       bool has_mask_store;
    1405              : 
    1406       717798 :       class loop *loop = get_loop (fun, i);
    1407       717798 :       if (!loop || !loop->aux)
    1408       656382 :         continue;
    1409        61416 :       loop_vinfo = (loop_vec_info) loop->aux;
    1410        61416 :       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
    1411        61416 :       delete loop_vinfo;
    1412        61416 :       if (has_mask_store
    1413        61416 :           && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
    1414          493 :         optimize_mask_stores (loop);
    1415              : 
    1416        61416 :       auto_bitmap exit_bbs;
    1417              :       /* Perform local CSE, this esp. helps because we emit code for
    1418              :          predicates that need to be shared for optimal predicate usage.
    1419              :          However reassoc will re-order them and prevent CSE from working
    1420              :          as it should.  CSE only the loop body, not the entry.  */
    1421        61416 :       auto_vec<edge> exits = get_loop_exit_edges (loop);
    1422       247185 :       for (edge exit : exits)
    1423        62937 :         bitmap_set_bit (exit_bbs, exit->dest->index);
    1424              : 
    1425        61416 :       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
    1426        61416 :       do_rpo_vn (fun, entry, exit_bbs);
    1427              : 
    1428        61416 :       loop->aux = NULL;
    1429        61416 :     }
    1430              : 
    1431       206946 :   vect_slp_fini ();
    1432              : 
    1433       206946 :   return ret;
    1434              : }
    1435              : 
    1436              : } // anon namespace
    1437              : 
    1438              : gimple_opt_pass *
    1439       288047 : make_pass_vectorize (gcc::context *ctxt)
    1440              : {
    1441       288047 :   return new pass_vectorize (ctxt);
    1442              : }
    1443              : 
    1444              : /* Entry point to the simduid cleanup pass.  */
    1445              : 
    1446              : namespace {
    1447              : 
    1448              : const pass_data pass_data_simduid_cleanup =
    1449              : {
    1450              :   GIMPLE_PASS, /* type */
    1451              :   "simduid", /* name */
    1452              :   OPTGROUP_NONE, /* optinfo_flags */
    1453              :   TV_NONE, /* tv_id */
    1454              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1455              :   0, /* properties_provided */
    1456              :   0, /* properties_destroyed */
    1457              :   0, /* todo_flags_start */
    1458              :   0, /* todo_flags_finish */
    1459              : };
    1460              : 
    1461              : class pass_simduid_cleanup : public gimple_opt_pass
    1462              : {
    1463              : public:
    1464       576094 :   pass_simduid_cleanup (gcc::context *ctxt)
    1465      1152188 :     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
    1466              :   {}
    1467              : 
    1468              :   /* opt_pass methods: */
    1469       288047 :   opt_pass * clone () final override
    1470              :   {
    1471       288047 :     return new pass_simduid_cleanup (m_ctxt);
    1472              :   }
    1473      2514741 :   bool gate (function *fun) final override { return fun->has_simduid_loops; }
    1474              :   unsigned int execute (function *) final override;
    1475              : 
    1476              : }; // class pass_simduid_cleanup
    1477              : 
    1478              : unsigned int
    1479         2208 : pass_simduid_cleanup::execute (function *fun)
    1480              : {
    1481         2208 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1482              : 
    1483         2208 :   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1484              : 
    1485              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1486         2208 :   adjust_simduid_builtins (NULL, fun);
    1487              : 
    1488              :   /* Shrink any "omp array simd" temporary arrays to the
    1489              :      actual vectorization factors.  */
    1490         2208 :   if (simd_array_to_simduid_htab)
    1491            4 :     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
    1492         2208 :   fun->has_simduid_loops = false;
    1493         2208 :   return 0;
    1494              : }
    1495              : 
    1496              : }  // anon namespace
    1497              : 
    1498              : gimple_opt_pass *
    1499       288047 : make_pass_simduid_cleanup (gcc::context *ctxt)
    1500              : {
    1501       288047 :   return new pass_simduid_cleanup (ctxt);
    1502              : }
    1503              : 
    1504              : 
    1505              : /*  Entry point to basic block SLP phase.  */
    1506              : 
    1507              : namespace {
    1508              : 
    1509              : const pass_data pass_data_slp_vectorize =
    1510              : {
    1511              :   GIMPLE_PASS, /* type */
    1512              :   "slp", /* name */
    1513              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1514              :   TV_TREE_SLP_VECTORIZATION, /* tv_id */
    1515              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1516              :   0, /* properties_provided */
    1517              :   0, /* properties_destroyed */
    1518              :   0, /* todo_flags_start */
    1519              :   TODO_update_ssa, /* todo_flags_finish */
    1520              : };
    1521              : 
    1522              : class pass_slp_vectorize : public gimple_opt_pass
    1523              : {
    1524              : public:
    1525       576094 :   pass_slp_vectorize (gcc::context *ctxt)
    1526      1152188 :     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
    1527              :   {}
    1528              : 
    1529              :   /* opt_pass methods: */
    1530       288047 :   opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); }
    1531      1039544 :   bool gate (function *) final override { return flag_tree_slp_vectorize != 0; }
    1532              :   unsigned int execute (function *) final override;
    1533              : 
    1534              : }; // class pass_slp_vectorize
    1535              : 
    1536              : unsigned int
    1537       906490 : pass_slp_vectorize::execute (function *fun)
    1538              : {
    1539       906490 :   auto_purge_vect_location sentinel;
    1540       906490 :   basic_block bb;
    1541              : 
    1542       906490 :   bool in_loop_pipeline = scev_initialized_p ();
    1543       906490 :   if (!in_loop_pipeline)
    1544              :     {
    1545       701175 :       loop_optimizer_init (LOOPS_NORMAL);
    1546       701175 :       scev_initialize ();
    1547              :     }
    1548              : 
    1549              :   /* Mark all stmts as not belonging to the current region and unvisited.  */
    1550     11439017 :   FOR_EACH_BB_FN (bb, fun)
    1551              :     {
    1552     15210910 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    1553      4678383 :            gsi_next (&gsi))
    1554              :         {
    1555      4678383 :           gphi *stmt = gsi.phi ();
    1556      4678383 :           gimple_set_uid (stmt, -1);
    1557      4678383 :           gimple_set_visited (stmt, false);
    1558              :         }
    1559     99112504 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    1560     78047450 :            gsi_next (&gsi))
    1561              :         {
    1562     78047450 :           gimple *stmt = gsi_stmt (gsi);
    1563     78047450 :           gimple_set_uid (stmt, -1);
    1564     78047450 :           gimple_set_visited (stmt, false);
    1565              :         }
    1566              :     }
    1567              : 
    1568       906490 :   vect_slp_init ();
    1569              : 
    1570       906490 :   vect_slp_function (fun);
    1571              : 
    1572       906490 :   vect_slp_fini ();
    1573              : 
    1574       906490 :   if (!in_loop_pipeline)
    1575              :     {
    1576       701175 :       scev_finalize ();
    1577       701175 :       loop_optimizer_finalize ();
    1578              :     }
    1579              : 
    1580      1812980 :   return 0;
    1581       906490 : }
    1582              : 
    1583              : } // anon namespace
    1584              : 
    1585              : gimple_opt_pass *
    1586       288047 : make_pass_slp_vectorize (gcc::context *ctxt)
    1587              : {
    1588       288047 :   return new pass_slp_vectorize (ctxt);
    1589              : }
    1590              : 
    1591              : 
    1592              : /* Increase alignment of global arrays to improve vectorization potential.
    1593              :    TODO:
    1594              :    - Consider also structs that have an array field.
    1595              :    - Use ipa analysis to prune arrays that can't be vectorized?
    1596              :      This should involve global alignment analysis and in the future also
    1597              :      array padding.  */
    1598              : 
    1599              : static unsigned get_vec_alignment_for_type (tree);
    1600              : static hash_map<tree, unsigned> *type_align_map;
    1601              : 
    1602              : /* Return alignment of array's vector type corresponding to scalar type.
    1603              :    0 if no vector type exists.  */
    1604              : static unsigned
    1605            0 : get_vec_alignment_for_array_type (tree type)
    1606              : {
    1607            0 :   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
    1608            0 :   poly_uint64 array_size, vector_size;
    1609              : 
    1610            0 :   tree scalar_type = strip_array_types (type);
    1611            0 :   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
    1612            0 :   if (!vectype
    1613            0 :       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
    1614            0 :       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
    1615            0 :       || maybe_lt (array_size, vector_size))
    1616            0 :     return 0;
    1617              : 
    1618            0 :   return TYPE_ALIGN (vectype);
    1619              : }
    1620              : 
    1621              : /* Return alignment of field having maximum alignment of vector type
    1622              :    corresponding to it's scalar type. For now, we only consider fields whose
    1623              :    offset is a multiple of it's vector alignment.
    1624              :    0 if no suitable field is found.  */
    1625              : static unsigned
    1626            0 : get_vec_alignment_for_record_type (tree type)
    1627              : {
    1628            0 :   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
    1629              : 
    1630            0 :   unsigned max_align = 0, alignment;
    1631            0 :   HOST_WIDE_INT offset;
    1632            0 :   tree offset_tree;
    1633              : 
    1634            0 :   if (TYPE_PACKED (type))
    1635              :     return 0;
    1636              : 
    1637            0 :   unsigned *slot = type_align_map->get (type);
    1638            0 :   if (slot)
    1639            0 :     return *slot;
    1640              : 
    1641            0 :   for (tree field = first_field (type);
    1642            0 :        field != NULL_TREE;
    1643            0 :        field = DECL_CHAIN (field))
    1644              :     {
    1645              :       /* Skip if not FIELD_DECL or if alignment is set by user.  */
    1646            0 :       if (TREE_CODE (field) != FIELD_DECL
    1647            0 :           || DECL_USER_ALIGN (field)
    1648            0 :           || DECL_ARTIFICIAL (field))
    1649            0 :         continue;
    1650              : 
    1651              :       /* We don't need to process the type further if offset is variable,
    1652              :          since the offsets of remaining members will also be variable.  */
    1653            0 :       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
    1654            0 :           || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
    1655              :         break;
    1656              : 
    1657              :       /* Similarly stop processing the type if offset_tree
    1658              :          does not fit in unsigned HOST_WIDE_INT.  */
    1659            0 :       offset_tree = bit_position (field);
    1660            0 :       if (!tree_fits_uhwi_p (offset_tree))
    1661              :         break;
    1662              : 
    1663            0 :       offset = tree_to_uhwi (offset_tree);
    1664            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
    1665              : 
    1666              :       /* Get maximum alignment of vectorized field/array among those members
    1667              :          whose offset is multiple of the vector alignment.  */
    1668            0 :       if (alignment
    1669            0 :           && (offset % alignment == 0)
    1670            0 :           && (alignment > max_align))
    1671            0 :         max_align = alignment;
    1672              :     }
    1673              : 
    1674            0 :   type_align_map->put (type, max_align);
    1675            0 :   return max_align;
    1676              : }
    1677              : 
    1678              : /* Return alignment of vector type corresponding to decl's scalar type
    1679              :    or 0 if it doesn't exist or the vector alignment is lesser than
    1680              :    decl's alignment.  */
    1681              : static unsigned
    1682            0 : get_vec_alignment_for_type (tree type)
    1683              : {
    1684            0 :   if (type == NULL_TREE)
    1685              :     return 0;
    1686              : 
    1687            0 :   gcc_assert (TYPE_P (type));
    1688              : 
    1689            0 :   static unsigned alignment = 0;
    1690            0 :   switch (TREE_CODE (type))
    1691              :     {
    1692            0 :       case ARRAY_TYPE:
    1693            0 :         alignment = get_vec_alignment_for_array_type (type);
    1694            0 :         break;
    1695            0 :       case RECORD_TYPE:
    1696            0 :         alignment = get_vec_alignment_for_record_type (type);
    1697            0 :         break;
    1698            0 :       default:
    1699            0 :         alignment = 0;
    1700            0 :         break;
    1701              :     }
    1702              : 
    1703            0 :   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
    1704              : }
    1705              : 
    1706              : /* Entry point to increase_alignment pass.  */
    1707              : static unsigned int
    1708            0 : increase_alignment (void)
    1709              : {
    1710            0 :   varpool_node *vnode;
    1711              : 
    1712            0 :   vect_location = dump_user_location_t ();
    1713            0 :   type_align_map = new hash_map<tree, unsigned>;
    1714              : 
    1715              :   /* Increase the alignment of all global arrays for vectorization.  */
    1716            0 :   FOR_EACH_DEFINED_VARIABLE (vnode)
    1717              :     {
    1718            0 :       tree decl = vnode->decl;
    1719            0 :       unsigned int alignment;
    1720              : 
    1721            0 :       if ((decl_in_symtab_p (decl)
    1722            0 :           && !symtab_node::get (decl)->can_increase_alignment_p ())
    1723            0 :           || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
    1724            0 :         continue;
    1725              : 
    1726            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
    1727            0 :       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
    1728              :         {
    1729            0 :           vnode->increase_alignment (alignment);
    1730            0 :           if (dump_enabled_p ())
    1731            0 :             dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
    1732              :         }
    1733              :     }
    1734              : 
    1735            0 :   delete type_align_map;
    1736            0 :   return 0;
    1737              : }
    1738              : 
    1739              : 
    1740              : namespace {
    1741              : 
    1742              : const pass_data pass_data_ipa_increase_alignment =
    1743              : {
    1744              :   SIMPLE_IPA_PASS, /* type */
    1745              :   "increase_alignment", /* name */
    1746              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1747              :   TV_IPA_OPT, /* tv_id */
    1748              :   0, /* properties_required */
    1749              :   0, /* properties_provided */
    1750              :   0, /* properties_destroyed */
    1751              :   0, /* todo_flags_start */
    1752              :   0, /* todo_flags_finish */
    1753              : };
    1754              : 
    1755              : class pass_ipa_increase_alignment : public simple_ipa_opt_pass
    1756              : {
    1757              : public:
    1758       288047 :   pass_ipa_increase_alignment (gcc::context *ctxt)
    1759       576094 :     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
    1760              :   {}
    1761              : 
    1762              :   /* opt_pass methods: */
    1763       231936 :   bool gate (function *) final override
    1764              :     {
    1765       231936 :       return flag_section_anchors && flag_tree_loop_vectorize;
    1766              :     }
    1767              : 
    1768            0 :   unsigned int execute (function *) final override
    1769              :   {
    1770            0 :     return increase_alignment ();
    1771              :   }
    1772              : 
    1773              : }; // class pass_ipa_increase_alignment
    1774              : 
    1775              : } // anon namespace
    1776              : 
    1777              : simple_ipa_opt_pass *
    1778       288047 : make_pass_ipa_increase_alignment (gcc::context *ctxt)
    1779              : {
    1780       288047 :   return new pass_ipa_increase_alignment (ctxt);
    1781              : }
    1782              : 
    1783              : /* If the condition represented by T is a comparison or the SSA name
    1784              :    result of a comparison, extract the comparison's operands.  Represent
    1785              :    T as NE_EXPR <T, 0> otherwise.  */
    1786              : 
    1787              : void
    1788        63264 : scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
    1789              : {
    1790        63264 :   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
    1791              :     {
    1792            0 :       this->code = TREE_CODE (t);
    1793            0 :       this->op0 = TREE_OPERAND (t, 0);
    1794            0 :       this->op1 = TREE_OPERAND (t, 1);
    1795            0 :       this->inverted_p = false;
    1796            0 :       return;
    1797              :     }
    1798              : 
    1799        63264 :   if (TREE_CODE (t) == SSA_NAME)
    1800        26063 :     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
    1801              :       {
    1802        26063 :         tree_code code = gimple_assign_rhs_code (stmt);
    1803        26063 :         if (TREE_CODE_CLASS (code) == tcc_comparison)
    1804              :           {
    1805        17666 :             this->code = code;
    1806        17666 :             this->op0 = gimple_assign_rhs1 (stmt);
    1807        17666 :             this->op1 = gimple_assign_rhs2 (stmt);
    1808        17666 :             this->inverted_p = false;
    1809        17666 :             return;
    1810              :           }
    1811         8397 :         else if (code == BIT_NOT_EXPR)
    1812              :           {
    1813         3565 :             tree n_op = gimple_assign_rhs1 (stmt);
    1814         3565 :             if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
    1815              :               {
    1816         3565 :                 code = gimple_assign_rhs_code (stmt);
    1817         3565 :                 if (TREE_CODE_CLASS (code) == tcc_comparison)
    1818              :                   {
    1819         3531 :                     this->code = code;
    1820         3531 :                     this->op0 = gimple_assign_rhs1 (stmt);
    1821         3531 :                     this->op1 = gimple_assign_rhs2 (stmt);
    1822         3531 :                     this->inverted_p = true;
    1823         3531 :                     return;
    1824              :                   }
    1825              :               }
    1826              :           }
    1827              :       }
    1828              : 
    1829        42067 :   this->code = NE_EXPR;
    1830        42067 :   this->op0 = t;
    1831        42067 :   this->op1 = build_zero_cst (TREE_TYPE (t));
    1832        42067 :   this->inverted_p = false;
    1833              : }
    1834              : 
    1835              : /* See the comment above the declaration for details.  */
    1836              : 
    1837              : unsigned int
    1838            0 : vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
    1839              :                              stmt_vec_info stmt_info, slp_tree,
    1840              :                              tree vectype, int misalign,
    1841              :                              vect_cost_model_location where)
    1842              : {
    1843            0 :   unsigned int cost
    1844            0 :     = builtin_vectorization_cost (kind, vectype, misalign) * count;
    1845            0 :   return record_stmt_cost (stmt_info, where, cost);
    1846              : }
    1847              : 
    1848              : /* See the comment above the declaration for details.  */
    1849              : 
    1850              : void
    1851      1801339 : vector_costs::finish_cost (const vector_costs *)
    1852              : {
    1853      1801339 :   gcc_assert (!m_finished);
    1854      1801339 :   m_finished = true;
    1855      1801339 : }
    1856              : 
    1857              : /* Record a base cost of COST units against WHERE.  If STMT_INFO is
    1858              :    nonnull, use it to adjust the cost based on execution frequency
    1859              :    (where appropriate).  */
    1860              : 
    1861              : unsigned int
    1862            0 : vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
    1863              :                                 vect_cost_model_location where,
    1864              :                                 unsigned int cost)
    1865              : {
    1866            0 :   cost = adjust_cost_for_freq (stmt_info, where, cost);
    1867            0 :   m_costs[where] += cost;
    1868            0 :   return cost;
    1869              : }
    1870              : 
    1871              : /* COST is the base cost we have calculated for an operation in location WHERE.
    1872              :    If STMT_INFO is nonnull, use it to adjust the cost based on execution
    1873              :    frequency (where appropriate).  Return the adjusted cost.  */
    1874              : 
    1875              : unsigned int
    1876      7447795 : vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
    1877              :                                     vect_cost_model_location where,
    1878              :                                     unsigned int cost)
    1879              : {
    1880              :   /* Statements in an inner loop relative to the loop being
    1881              :      vectorized are weighted more heavily.  The value here is
    1882              :      arbitrary and could potentially be improved with analysis.  */
    1883      7447795 :   if (where == vect_body
    1884      7447795 :       && stmt_info
    1885      7447795 :       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
    1886              :     {
    1887        11870 :       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
    1888        11870 :       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
    1889              :     }
    1890      7447795 :   return cost;
    1891              : }
    1892              : 
    1893              : /* See the comment above the declaration for details.  */
    1894              : 
    1895              : bool
    1896        30777 : vector_costs::better_main_loop_than_p (const vector_costs *other) const
    1897              : {
    1898        30777 :   int diff = compare_inside_loop_cost (other);
    1899        30777 :   if (diff != 0)
    1900        30690 :     return diff < 0;
    1901              : 
    1902              :   /* If there's nothing to choose between the loop bodies, see whether
    1903              :      there's a difference in the prologue and epilogue costs.  */
    1904           87 :   diff = compare_outside_loop_cost (other);
    1905           87 :   if (diff != 0)
    1906           61 :     return diff < 0;
    1907              : 
    1908              :   return false;
    1909              : }
    1910              : 
    1911              : 
    1912              : /* See the comment above the declaration for details.  */
    1913              : 
    1914              : bool
    1915         1439 : vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
    1916              :                                            loop_vec_info main_loop) const
    1917              : {
    1918         1439 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    1919         1439 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    1920              : 
    1921         1439 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    1922         1439 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    1923              : 
    1924         1439 :   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
    1925         1439 :   unsigned HOST_WIDE_INT main_vf;
    1926         1439 :   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
    1927              :   /* If we can determine how many iterations are left for the epilogue
    1928              :      loop, that is if both the main loop's vectorization factor and number
    1929              :      of iterations are constant, then we use them to calculate the cost of
    1930              :      the epilogue loop together with a 'likely value' for the epilogues
    1931              :      vectorization factor.  Otherwise we use the main loop's vectorization
    1932              :      factor and the maximum poly value for the epilogue's.  If the target
    1933              :      has not provided with a sensible upper bound poly vectorization
    1934              :      factors are likely to be favored over constant ones.  */
    1935         1439 :   if (main_poly_vf.is_constant (&main_vf)
    1936         1439 :       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
    1937              :     {
    1938           94 :       unsigned HOST_WIDE_INT niters
    1939           94 :         = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
    1940           94 :       HOST_WIDE_INT other_likely_vf
    1941           94 :         = estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
    1942           94 :       HOST_WIDE_INT this_likely_vf
    1943           94 :         = estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
    1944              : 
    1945              :       /* If the epilogue is using partial vectors we account for the
    1946              :          partial iteration here too.  */
    1947           94 :       other_factor = niters / other_likely_vf;
    1948           94 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
    1949            0 :           && niters % other_likely_vf != 0)
    1950            0 :         other_factor++;
    1951              : 
    1952           94 :       this_factor = niters / this_likely_vf;
    1953           94 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
    1954            0 :           && niters % this_likely_vf != 0)
    1955            0 :         this_factor++;
    1956              :     }
    1957              :   else
    1958              :     {
    1959         1345 :       unsigned HOST_WIDE_INT main_vf_max
    1960         1345 :         = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
    1961         1345 :       unsigned HOST_WIDE_INT other_vf_max
    1962         1345 :         = estimated_poly_value (other_vf, POLY_VALUE_MAX);
    1963         1345 :       unsigned HOST_WIDE_INT this_vf_max
    1964         1345 :         = estimated_poly_value (this_vf, POLY_VALUE_MAX);
    1965              : 
    1966         1345 :       other_factor = CEIL (main_vf_max, other_vf_max);
    1967         1345 :       this_factor = CEIL (main_vf_max, this_vf_max);
    1968              : 
    1969              :       /* If the loop is not using partial vectors then it will iterate one
    1970              :          time less than one that does.  It is safe to subtract one here,
    1971              :          because the main loop's vf is always at least 2x bigger than that
    1972              :          of an epilogue.  */
    1973         1345 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
    1974         1332 :         other_factor -= 1;
    1975         1345 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
    1976         1343 :         this_factor -= 1;
    1977              :     }
    1978              : 
    1979              :   /* Compute the costs by multiplying the inside costs with the factor and
    1980              :      add the outside costs for a more complete picture.  The factor is the
    1981              :      amount of times we are expecting to iterate this epilogue.  */
    1982         1439 :   other_cost = other->body_cost () * other_factor;
    1983         1439 :   this_cost = this->body_cost () * this_factor;
    1984         1439 :   other_cost += other->outside_cost ();
    1985         1439 :   this_cost += this->outside_cost ();
    1986         1439 :   return this_cost < other_cost;
    1987              : }
    1988              : 
    1989              : /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
    1990              :    determine the return value of better_main_loop_than_p by comparing the
    1991              :    inside (loop body) costs of THIS and OTHER.  Return:
    1992              : 
    1993              :    * -1 if better_main_loop_than_p should return true.
    1994              :    * 1 if better_main_loop_than_p should return false.
    1995              :    * 0 if we can't decide.  */
    1996              : 
    1997              : int
    1998        30777 : vector_costs::compare_inside_loop_cost (const vector_costs *other) const
    1999              : {
    2000        30777 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    2001        30777 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    2002              : 
    2003        30777 :   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
    2004        30777 :   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
    2005              : 
    2006        30777 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    2007        30777 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    2008              : 
    2009              :   /* Limit the VFs to what is likely to be the maximum number of iterations,
    2010              :      to handle cases in which at least one loop_vinfo is fully-masked.  */
    2011        30777 :   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
    2012        30777 :   if (estimated_max_niter != -1)
    2013              :     {
    2014        29703 :       if (estimated_poly_value (this_vf, POLY_VALUE_MIN)
    2015              :           >= estimated_max_niter)
    2016              :         this_vf = estimated_max_niter;
    2017        29703 :       if (estimated_poly_value (other_vf, POLY_VALUE_MIN)
    2018              :           >= estimated_max_niter)
    2019              :         other_vf = estimated_max_niter;
    2020              :     }
    2021              : 
    2022              :   /* Check whether the (fractional) cost per scalar iteration is lower or
    2023              :      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
    2024        30777 :   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
    2025        30777 :   poly_int64 rel_other
    2026        30777 :     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
    2027              : 
    2028        30777 :   HOST_WIDE_INT est_rel_this_min
    2029        30777 :     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
    2030        30777 :   HOST_WIDE_INT est_rel_this_max
    2031        30777 :     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
    2032              : 
    2033        30777 :   HOST_WIDE_INT est_rel_other_min
    2034        30777 :     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
    2035        30777 :   HOST_WIDE_INT est_rel_other_max
    2036        30777 :     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
    2037              : 
    2038              :   /* Check first if we can make out an unambigous total order from the minimum
    2039              :      and maximum estimates.  */
    2040        30777 :   if (est_rel_this_min < est_rel_other_min
    2041              :       && est_rel_this_max < est_rel_other_max)
    2042              :     return -1;
    2043              : 
    2044        29903 :   if (est_rel_other_min < est_rel_this_min
    2045              :       && est_rel_other_max < est_rel_this_max)
    2046        29816 :     return 1;
    2047              : 
    2048              :   /* When other_loop_vinfo uses a variable vectorization factor,
    2049              :      we know that it has a lower cost for at least one runtime VF.
    2050              :      However, we don't know how likely that VF is.
    2051              : 
    2052              :      One option would be to compare the costs for the estimated VFs.
    2053              :      The problem is that that can put too much pressure on the cost
    2054              :      model.  E.g. if the estimated VF is also the lowest possible VF,
    2055              :      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
    2056              :      for the estimated VF, we'd then choose this_loop_vinfo even
    2057              :      though (a) this_loop_vinfo might not actually be better than
    2058              :      other_loop_vinfo for that VF and (b) it would be significantly
    2059              :      worse at larger VFs.
    2060              : 
    2061              :      Here we go for a hacky compromise: pick this_loop_vinfo if it is
    2062              :      no more expensive than other_loop_vinfo even after doubling the
    2063              :      estimated other_loop_vinfo VF.  For all but trivial loops, this
    2064              :      ensures that we only pick this_loop_vinfo if it is significantly
    2065              :      better than other_loop_vinfo at the estimated VF.  */
    2066              :   if (est_rel_other_min != est_rel_this_min
    2067              :       || est_rel_other_max != est_rel_this_max)
    2068              :     {
    2069              :       HOST_WIDE_INT est_rel_this_likely
    2070              :         = estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
    2071              :       HOST_WIDE_INT est_rel_other_likely
    2072              :         = estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
    2073              : 
    2074              :       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
    2075              :     }
    2076              : 
    2077              :   return 0;
    2078              : }
    2079              : 
    2080              : /* A <=>-style subroutine of better_main_loop_than_p, used when there is
    2081              :    nothing to choose between the inside (loop body) costs of THIS and OTHER.
    2082              :    Check whether we can determine the return value of better_main_loop_than_p
    2083              :    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
    2084              :    Return:
    2085              : 
    2086              :    * -1 if better_main_loop_than_p should return true.
    2087              :    * 1 if better_main_loop_than_p should return false.
    2088              :    * 0 if we can't decide.  */
    2089              : 
    2090              : int
    2091           87 : vector_costs::compare_outside_loop_cost (const vector_costs *other) const
    2092              : {
    2093           87 :   auto this_outside_cost = this->outside_cost ();
    2094           87 :   auto other_outside_cost = other->outside_cost ();
    2095           87 :   if (this_outside_cost != other_outside_cost)
    2096           61 :     return this_outside_cost < other_outside_cost ? -1 : 1;
    2097              : 
    2098              :   return 0;
    2099              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.