LCOV - code coverage report
Current view: top level - gcc - tree-vectorizer.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 84.0 % 932 783
Test Date: 2026-02-28 14:20:25 Functions: 85.9 % 64 55
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : /* Loop and basic block vectorizer.
      22              : 
      23              :   This file contains drivers for the three vectorizers:
      24              :   (1) loop vectorizer (inter-iteration parallelism),
      25              :   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
      26              :       vectorizer)
      27              :   (3) BB vectorizer (out-of-loops), aka SLP
      28              : 
      29              :   The rest of the vectorizer's code is organized as follows:
      30              :   - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
      31              :     used by drivers (1) and (2).
      32              :   - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
      33              :     drivers (1) and (2).
      34              :   - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
      35              :     used by drivers (2) and (3).
      36              :   - tree-vect-stmts.cc - statements analysis and transformation (used by all).
      37              :   - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
      38              :     manipulations (used by all).
      39              :   - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
      40              : 
      41              :   Here's a poor attempt at illustrating that:
      42              : 
      43              :      tree-vectorizer.cc:
      44              :      loop_vect()  loop_aware_slp()  slp_vect()
      45              :           |        /           \          /
      46              :           |       /             \        /
      47              :           tree-vect-loop.cc  tree-vect-slp.cc
      48              :                 | \      \  /      /   |
      49              :                 |  \      \/      /    |
      50              :                 |   \     /\     /     |
      51              :                 |    \   /  \   /      |
      52              :          tree-vect-stmts.cc  tree-vect-data-refs.cc
      53              :                        \      /
      54              :                     tree-vect-patterns.cc
      55              : */
      56              : 
      57              : #include "config.h"
      58              : #include "system.h"
      59              : #include "coretypes.h"
      60              : #include "backend.h"
      61              : #include "tree.h"
      62              : #include "gimple.h"
      63              : #include "predict.h"
      64              : #include "tree-pass.h"
      65              : #include "ssa.h"
      66              : #include "cgraph.h"
      67              : #include "fold-const.h"
      68              : #include "stor-layout.h"
      69              : #include "gimple-iterator.h"
      70              : #include "gimple-walk.h"
      71              : #include "tree-ssa-loop-manip.h"
      72              : #include "tree-ssa-loop-niter.h"
      73              : #include "tree-cfg.h"
      74              : #include "cfgloop.h"
      75              : #include "tree-vectorizer.h"
      76              : #include "tree-ssa-propagate.h"
      77              : #include "dbgcnt.h"
      78              : #include "tree-scalar-evolution.h"
      79              : #include "stringpool.h"
      80              : #include "attribs.h"
      81              : #include "gimple-pretty-print.h"
      82              : #include "opt-problem.h"
      83              : #include "internal-fn.h"
      84              : #include "tree-ssa-sccvn.h"
      85              : #include "tree-into-ssa.h"
      86              : #include "gimple-range.h"
      87              : 
      88              : /* Loop or bb location, with hotness information.  */
      89              : dump_user_location_t vect_location;
      90              : 
      91              : /* auto_purge_vect_location's dtor: reset the vect_location
      92              :    global, to avoid stale location_t values that could reference
      93              :    GC-ed blocks.  */
      94              : 
      95      1379087 : auto_purge_vect_location::~auto_purge_vect_location ()
      96              : {
      97      1379087 :   vect_location = dump_user_location_t ();
      98      1379087 : }
      99              : 
     100              : /* Dump a cost entry according to args to F.  */
     101              : 
     102              : void
     103       214045 : dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
     104              :                 stmt_vec_info stmt_info, slp_tree node, tree,
     105              :                 int misalign, unsigned cost,
     106              :                 enum vect_cost_model_location where)
     107              : {
     108       214045 :   if (stmt_info)
     109              :     {
     110       197094 :       print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
     111       197094 :       fprintf (f, " ");
     112              :     }
     113        16951 :   else if (node)
     114         3339 :     fprintf (f, "node %p ", (void *)node);
     115              :   else
     116        13612 :     fprintf (f, "<unknown> ");
     117       214045 :   fprintf (f, "%d times ", count);
     118       214045 :   const char *ks = "unknown";
     119       214045 :   switch (kind)
     120              :     {
     121        48646 :     case scalar_stmt:
     122        48646 :       ks = "scalar_stmt";
     123        48646 :       break;
     124        36048 :     case scalar_load:
     125        36048 :       ks = "scalar_load";
     126        36048 :       break;
     127        27140 :     case scalar_store:
     128        27140 :       ks = "scalar_store";
     129        27140 :       break;
     130        34384 :     case vector_stmt:
     131        34384 :       ks = "vector_stmt";
     132        34384 :       break;
     133        21736 :     case vector_load:
     134        21736 :       ks = "vector_load";
     135        21736 :       break;
     136            0 :     case vector_gather_load:
     137            0 :       ks = "vector_gather_load";
     138            0 :       break;
     139         7501 :     case unaligned_load:
     140         7501 :       ks = "unaligned_load";
     141         7501 :       break;
     142         4535 :     case unaligned_store:
     143         4535 :       ks = "unaligned_store";
     144         4535 :       break;
     145         8618 :     case vector_store:
     146         8618 :       ks = "vector_store";
     147         8618 :       break;
     148            0 :     case vector_scatter_store:
     149            0 :       ks = "vector_scatter_store";
     150            0 :       break;
     151         3221 :     case vec_to_scalar:
     152         3221 :       ks = "vec_to_scalar";
     153         3221 :       break;
     154         9113 :     case scalar_to_vec:
     155         9113 :       ks = "scalar_to_vec";
     156         9113 :       break;
     157            8 :     case cond_branch_not_taken:
     158            8 :       ks = "cond_branch_not_taken";
     159            8 :       break;
     160          363 :     case cond_branch_taken:
     161          363 :       ks = "cond_branch_taken";
     162          363 :       break;
     163         6320 :     case vec_perm:
     164         6320 :       ks = "vec_perm";
     165         6320 :       break;
     166         5224 :     case vec_promote_demote:
     167         5224 :       ks = "vec_promote_demote";
     168         5224 :       break;
     169         1188 :     case vec_construct:
     170         1188 :       ks = "vec_construct";
     171         1188 :       break;
     172              :     }
     173       214045 :   fprintf (f, "%s ", ks);
     174       214045 :   if (kind == unaligned_load || kind == unaligned_store)
     175        12036 :     fprintf (f, "(misalign %d) ", misalign);
     176       214045 :   fprintf (f, "costs %u ", cost);
     177       214045 :   const char *ws = "unknown";
     178       214045 :   switch (where)
     179              :     {
     180       128191 :     case vect_prologue:
     181       128191 :       ws = "prologue";
     182       128191 :       break;
     183        79125 :     case vect_body:
     184        79125 :       ws = "body";
     185        79125 :       break;
     186         6729 :     case vect_epilogue:
     187         6729 :       ws = "epilogue";
     188         6729 :       break;
     189              :     }
     190       214045 :   fprintf (f, "in %s\n", ws);
     191       214045 : }
     192              : 
     193              : /* For mapping simduid to vectorization factor.  */
     194              : 
     195              : class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
     196              : {
     197              : public:
     198              :   unsigned int simduid;
     199              :   poly_uint64 vf;
     200              : 
     201              :   /* hash_table support.  */
     202              :   static inline hashval_t hash (const simduid_to_vf *);
     203              :   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
     204              : };
     205              : 
     206              : inline hashval_t
     207         8414 : simduid_to_vf::hash (const simduid_to_vf *p)
     208              : {
     209         8414 :   return p->simduid;
     210              : }
     211              : 
     212              : inline int
     213        14768 : simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
     214              : {
     215        14768 :   return p1->simduid == p2->simduid;
     216              : }
     217              : 
     218              : /* This hash maps the OMP simd array to the corresponding simduid used
     219              :    to index into it.  Like thus,
     220              : 
     221              :         _7 = GOMP_SIMD_LANE (simduid.0)
     222              :         ...
     223              :         ...
     224              :         D.1737[_7] = stuff;
     225              : 
     226              : 
     227              :    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
     228              :    simduid.0.  */
     229              : 
     230              : struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
     231              : {
     232              :   tree decl;
     233              :   unsigned int simduid;
     234              : 
     235              :   /* hash_table support.  */
     236              :   static inline hashval_t hash (const simd_array_to_simduid *);
     237              :   static inline int equal (const simd_array_to_simduid *,
     238              :                            const simd_array_to_simduid *);
     239              : };
     240              : 
     241              : inline hashval_t
     242        23846 : simd_array_to_simduid::hash (const simd_array_to_simduid *p)
     243              : {
     244        23846 :   return DECL_UID (p->decl);
     245              : }
     246              : 
     247              : inline int
     248        16545 : simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
     249              :                               const simd_array_to_simduid *p2)
     250              : {
     251        16545 :   return p1->decl == p2->decl;
     252              : }
     253              : 
     254              : /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
     255              :    into their corresponding constants and remove
     256              :    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
     257              : 
     258              : static void
     259         7809 : adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
     260              : {
     261         7809 :   basic_block bb;
     262              : 
     263       118628 :   FOR_EACH_BB_FN (bb, fun)
     264              :     {
     265       110819 :       gimple_stmt_iterator i;
     266              : 
     267       739076 :       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
     268              :         {
     269       517438 :           poly_uint64 vf = 1;
     270       517438 :           enum internal_fn ifn;
     271       517438 :           gimple *stmt = gsi_stmt (i);
     272       517438 :           tree t;
     273       517438 :           if (!is_gimple_call (stmt)
     274       517438 :               || !gimple_call_internal_p (stmt))
     275              :             {
     276       509875 :               gsi_next (&i);
     277       510791 :               continue;
     278              :             }
     279         7563 :           ifn = gimple_call_internal_fn (stmt);
     280         7563 :           switch (ifn)
     281              :             {
     282         6647 :             case IFN_GOMP_SIMD_LANE:
     283         6647 :             case IFN_GOMP_SIMD_VF:
     284         6647 :             case IFN_GOMP_SIMD_LAST_LANE:
     285         6647 :               break;
     286          158 :             case IFN_GOMP_SIMD_ORDERED_START:
     287          158 :             case IFN_GOMP_SIMD_ORDERED_END:
     288          158 :               if (integer_onep (gimple_call_arg (stmt, 0)))
     289              :                 {
     290            5 :                   enum built_in_function bcode
     291              :                     = (ifn == IFN_GOMP_SIMD_ORDERED_START
     292           10 :                        ? BUILT_IN_GOMP_ORDERED_START
     293              :                        : BUILT_IN_GOMP_ORDERED_END);
     294           10 :                   gimple *g
     295           10 :                     = gimple_build_call (builtin_decl_explicit (bcode), 0);
     296           10 :                   gimple_move_vops (g, stmt);
     297           10 :                   gsi_replace (&i, g, true);
     298           10 :                   continue;
     299           10 :                 }
     300          148 :               gsi_remove (&i, true);
     301          148 :               unlink_stmt_vdef (stmt);
     302          148 :               continue;
     303          758 :             default:
     304          758 :               gsi_next (&i);
     305          758 :               continue;
     306          906 :             }
     307         6647 :           tree arg = gimple_call_arg (stmt, 0);
     308         6647 :           gcc_assert (arg != NULL_TREE);
     309         6647 :           gcc_assert (TREE_CODE (arg) == SSA_NAME);
     310         6647 :           simduid_to_vf *p = NULL, data;
     311         6647 :           data.simduid = DECL_UID (SSA_NAME_VAR (arg));
     312              :           /* Need to nullify loop safelen field since it's value is not
     313              :              valid after transformation.  */
     314         6647 :           if (bb->loop_father && bb->loop_father->safelen > 0)
     315         2169 :             bb->loop_father->safelen = 0;
     316         6647 :           if (htab)
     317              :             {
     318         4742 :               p = htab->find (&data);
     319         4742 :               if (p)
     320         4701 :                 vf = p->vf;
     321              :             }
     322         6647 :           switch (ifn)
     323              :             {
     324          969 :             case IFN_GOMP_SIMD_VF:
     325          969 :               t = build_int_cst (unsigned_type_node, vf);
     326          969 :               break;
     327         3484 :             case IFN_GOMP_SIMD_LANE:
     328         3484 :               t = build_int_cst (unsigned_type_node, 0);
     329         3484 :               break;
     330         2194 :             case IFN_GOMP_SIMD_LAST_LANE:
     331         2194 :               t = gimple_call_arg (stmt, 1);
     332         2194 :               break;
     333              :             default:
     334              :               gcc_unreachable ();
     335              :             }
     336         6647 :           tree lhs = gimple_call_lhs (stmt);
     337         6647 :           if (lhs)
     338         6590 :             replace_uses_by (lhs, t);
     339         6647 :           release_defs (stmt);
     340         6647 :           gsi_remove (&i, true);
     341              :         }
     342              :     }
     343         7809 : }
     344              : 
     345              : /* Helper structure for note_simd_array_uses.  */
     346              : 
     347              : struct note_simd_array_uses_struct
     348              : {
     349              :   hash_table<simd_array_to_simduid> **htab;
     350              :   unsigned int simduid;
     351              : };
     352              : 
     353              : /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
     354              : 
     355              : static tree
     356        65131 : note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
     357              : {
     358        65131 :   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
     359        65131 :   struct note_simd_array_uses_struct *ns
     360              :     = (struct note_simd_array_uses_struct *) wi->info;
     361              : 
     362        65131 :   if (TYPE_P (*tp))
     363            0 :     *walk_subtrees = 0;
     364        65131 :   else if (VAR_P (*tp)
     365        12360 :            && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
     366        77491 :            && DECL_CONTEXT (*tp) == current_function_decl)
     367              :     {
     368        12360 :       simd_array_to_simduid data;
     369        12360 :       if (!*ns->htab)
     370         2208 :         *ns->htab = new hash_table<simd_array_to_simduid> (15);
     371        12360 :       data.decl = *tp;
     372        12360 :       data.simduid = ns->simduid;
     373        12360 :       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
     374        12360 :       if (*slot == NULL)
     375              :         {
     376         5501 :           simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
     377         5501 :           *p = data;
     378         5501 :           *slot = p;
     379              :         }
     380         6859 :       else if ((*slot)->simduid != ns->simduid)
     381            0 :         (*slot)->simduid = -1U;
     382        12360 :       *walk_subtrees = 0;
     383              :     }
     384        65131 :   return NULL_TREE;
     385              : }
     386              : 
     387              : /* Find "omp simd array" temporaries and map them to corresponding
     388              :    simduid.  */
     389              : 
     390              : static void
     391         7809 : note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
     392              : {
     393         7809 :   basic_block bb;
     394         7809 :   gimple_stmt_iterator gsi;
     395         7809 :   struct walk_stmt_info wi;
     396         7809 :   struct note_simd_array_uses_struct ns;
     397              : 
     398         7809 :   memset (&wi, 0, sizeof (wi));
     399         7809 :   wi.info = &ns;
     400         7809 :   ns.htab = htab;
     401              : 
     402       102471 :   FOR_EACH_BB_FN (bb, fun)
     403       582996 :     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     404              :       {
     405       393672 :         gimple *stmt = gsi_stmt (gsi);
     406       393672 :         if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
     407       387088 :           continue;
     408         7509 :         switch (gimple_call_internal_fn (stmt))
     409              :           {
     410         6620 :           case IFN_GOMP_SIMD_LANE:
     411         6620 :           case IFN_GOMP_SIMD_VF:
     412         6620 :           case IFN_GOMP_SIMD_LAST_LANE:
     413         6620 :             break;
     414          889 :           default:
     415          889 :             continue;
     416              :           }
     417         6620 :         tree lhs = gimple_call_lhs (stmt);
     418         6620 :         if (lhs == NULL_TREE)
     419           36 :           continue;
     420         6584 :         imm_use_iterator use_iter;
     421         6584 :         gimple *use_stmt;
     422         6584 :         ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
     423        31195 :         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
     424        18027 :           if (!is_gimple_debug (use_stmt))
     425        24522 :             walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
     426              :       }
     427         7809 : }
     428              : 
     429              : /* Shrink arrays with "omp simd array" attribute to the corresponding
     430              :    vectorization factor.  */
     431              : 
     432              : static void
     433         2208 : shrink_simd_arrays
     434              :   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
     435              :    hash_table<simduid_to_vf> *simduid_to_vf_htab)
     436              : {
     437         7709 :   for (hash_table<simd_array_to_simduid>::iterator iter
     438         2208 :          = simd_array_to_simduid_htab->begin ();
     439        13210 :        iter != simd_array_to_simduid_htab->end (); ++iter)
     440         5501 :     if ((*iter)->simduid != -1U)
     441              :       {
     442         5501 :         tree decl = (*iter)->decl;
     443         5501 :         poly_uint64 vf = 1;
     444         5501 :         if (simduid_to_vf_htab)
     445              :           {
     446         4574 :             simduid_to_vf *p = NULL, data;
     447         4574 :             data.simduid = (*iter)->simduid;
     448         4574 :             p = simduid_to_vf_htab->find (&data);
     449         4574 :             if (p)
     450         4540 :               vf = p->vf;
     451              :           }
     452         5501 :         tree atype
     453         5501 :           = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
     454         5501 :         TREE_TYPE (decl) = atype;
     455         5501 :         relayout_decl (decl);
     456              :       }
     457              : 
     458         2208 :   delete simd_array_to_simduid_htab;
     459         2208 : }
     460              : 
     461              : /* Initialize the vec_info with kind KIND_IN and target cost data
     462              :    TARGET_COST_DATA_IN.  */
     463              : 
     464      2697078 : vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
     465      2697078 :   : kind (kind_in),
     466      2697078 :     shared (shared_),
     467      2697078 :     stmt_vec_info_ro (false),
     468      2697078 :     bbs (NULL),
     469      2697078 :     nbbs (0),
     470      2697078 :     inv_pattern_def_seq (NULL)
     471              : {
     472      2697078 :   stmt_vec_infos.create (50);
     473      2697078 : }
     474              : 
     475      2697078 : vec_info::~vec_info ()
     476              : {
     477      4687672 :   for (slp_instance &instance : slp_instances)
     478      1027344 :     vect_free_slp_instance (instance);
     479              : 
     480      2697078 :   free_stmt_vec_infos ();
     481      2697078 : }
     482              : 
     483      2348666 : vec_info_shared::vec_info_shared ()
     484      2348666 :   : datarefs (vNULL),
     485      2348666 :     datarefs_copy (vNULL),
     486      2348666 :     ddrs (vNULL)
     487              : {
     488      2348666 : }
     489              : 
     490      2348666 : vec_info_shared::~vec_info_shared ()
     491              : {
     492      2348666 :   free_data_refs (datarefs);
     493      2348666 :   free_dependence_relations (ddrs);
     494      2348666 :   datarefs_copy.release ();
     495      2348666 : }
     496              : 
     497              : void
     498      2115011 : vec_info_shared::save_datarefs ()
     499              : {
     500      2115011 :   if (!flag_checking)
     501              :     return;
     502      3171744 :   datarefs_copy.reserve_exact (datarefs.length ());
     503     12783438 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     504     10668444 :     datarefs_copy.quick_push (*datarefs[i]);
     505              : }
     506              : 
     507              : void
     508       805721 : vec_info_shared::check_datarefs ()
     509              : {
     510       805721 :   if (!flag_checking)
     511              :     return;
     512      2414101 :   gcc_assert (datarefs.length () == datarefs_copy.length ());
     513     11956606 :   for (unsigned i = 0; i < datarefs.length (); ++i)
     514     11150885 :     if (memcmp (&datarefs_copy[i], datarefs[i],
     515              :                 offsetof (data_reference, alt_indices)) != 0)
     516            0 :       gcc_unreachable ();
     517              : }
     518              : 
     519              : /* Record that STMT belongs to the vectorizable region.  Create and return
     520              :    an associated stmt_vec_info.  */
     521              : 
     522              : stmt_vec_info
     523     60966745 : vec_info::add_stmt (gimple *stmt)
     524              : {
     525     60966745 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     526     60966745 :   set_vinfo_for_stmt (stmt, res);
     527     60966745 :   return res;
     528              : }
     529              : 
     530              : /* Record that STMT belongs to the vectorizable region.  Create a new
     531              :    stmt_vec_info and mark VECINFO as being related and return the new
     532              :    stmt_vec_info.  */
     533              : 
     534              : stmt_vec_info
     535         1081 : vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
     536              : {
     537         1081 :   stmt_vec_info res = new_stmt_vec_info (stmt);
     538         1081 :   res->pattern_stmt_p = true;
     539         1081 :   set_vinfo_for_stmt (stmt, res, false);
     540         1081 :   STMT_VINFO_RELATED_STMT (res) = stmt_info;
     541         1081 :   return res;
     542              : }
     543              : 
     544              : /* If STMT was previously associated with a stmt_vec_info and STMT now resides
     545              :    at a different address than before (e.g., because STMT is a phi node that has
     546              :    been resized), update the stored address to match the new one.  It is not
     547              :    possible to use lookup_stmt () to perform this task, because that function
     548              :    returns NULL if the stored stmt pointer does not match the one being looked
     549              :    up.  */
     550              : 
     551              : stmt_vec_info
     552        10876 : vec_info::resync_stmt_addr (gimple *stmt)
     553              : {
     554        10876 :   unsigned int uid = gimple_uid (stmt);
     555        10876 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     556              :     {
     557        10876 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     558        10876 :       if (res && res->stmt)
     559              :         {
     560        10876 :           res->stmt = stmt;
     561        10876 :           return res;
     562              :         }
     563              :     }
     564              :   return nullptr;
     565              : }
     566              : 
     567              : /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
     568              :    return null.  It is safe to call this function on any statement, even if
     569              :    it might not be part of the vectorizable region.  */
     570              : 
     571              : stmt_vec_info
     572    466709285 : vec_info::lookup_stmt (gimple *stmt)
     573              : {
     574    466709285 :   unsigned int uid = gimple_uid (stmt);
     575    466709285 :   if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
     576              :     {
     577    291132812 :       stmt_vec_info res = stmt_vec_infos[uid - 1];
     578    291132812 :       if (res && res->stmt == stmt)
     579    290805594 :         return res;
     580              :     }
     581              :   return NULL;
     582              : }
     583              : 
     584              : /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
     585              :    return that stmt_vec_info, otherwise return null.  It is safe to call
     586              :    this on arbitrary operands.  */
     587              : 
     588              : stmt_vec_info
     589     49135217 : vec_info::lookup_def (tree name)
     590              : {
     591     49135217 :   if (TREE_CODE (name) == SSA_NAME
     592     49135217 :       && !SSA_NAME_IS_DEFAULT_DEF (name))
     593     46928194 :     return lookup_stmt (SSA_NAME_DEF_STMT (name));
     594              :   return NULL;
     595              : }
     596              : 
     597              : /* See whether there is a single non-debug statement that uses LHS and
     598              :    whether that statement has an associated stmt_vec_info.  Return the
     599              :    stmt_vec_info if so, otherwise return null.  */
     600              : 
     601              : stmt_vec_info
     602         1991 : vec_info::lookup_single_use (tree lhs)
     603              : {
     604         1991 :   use_operand_p dummy;
     605         1991 :   gimple *use_stmt;
     606         1991 :   if (single_imm_use (lhs, &dummy, &use_stmt))
     607         1843 :     return lookup_stmt (use_stmt);
     608              :   return NULL;
     609              : }
     610              : 
     611              : /* Return vectorization information about DR.  */
     612              : 
     613              : dr_vec_info *
     614     47359842 : vec_info::lookup_dr (data_reference *dr)
     615              : {
     616     47359842 :   stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
     617              :   /* DR_STMT should never refer to a stmt in a pattern replacement.  */
     618     47359842 :   gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
     619     47359842 :   return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
     620              : }
     621              : 
     622              : /* Record that NEW_STMT_INFO now implements the same data reference
     623              :    as OLD_STMT_INFO.  */
     624              : 
     625              : void
     626         6130 : vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
     627              : {
     628         6130 :   gcc_assert (!is_pattern_stmt_p (old_stmt_info));
     629         6130 :   STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
     630         6130 :   new_stmt_info->dr_aux = old_stmt_info->dr_aux;
     631         6130 :   STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
     632         6130 :     = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
     633         6130 :   STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
     634         6130 :     = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
     635         6130 :   STMT_VINFO_STRIDED_P (new_stmt_info)
     636         6130 :     = STMT_VINFO_STRIDED_P (old_stmt_info);
     637         6130 :   STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
     638         6130 :     = STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
     639         6130 : }
     640              : 
     641              : /* Permanently remove the statement described by STMT_INFO from the
     642              :    function.  */
     643              : 
     644              : void
     645      1481702 : vec_info::remove_stmt (stmt_vec_info stmt_info)
     646              : {
     647      1481702 :   gcc_assert (!stmt_info->pattern_stmt_p);
     648      1481702 :   set_vinfo_for_stmt (stmt_info->stmt, NULL);
     649      1481702 :   unlink_stmt_vdef (stmt_info->stmt);
     650      1481702 :   gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
     651      1481702 :   gsi_remove (&si, true);
     652      1481702 :   release_defs (stmt_info->stmt);
     653      1481702 :   free_stmt_vec_info (stmt_info);
     654      1481702 : }
     655              : 
     656              : /* Replace the statement at GSI by NEW_STMT, both the vectorization
     657              :    information and the function itself.  STMT_INFO describes the statement
     658              :    at GSI.  */
     659              : 
     660              : void
     661         5193 : vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
     662              :                         gimple *new_stmt)
     663              : {
     664         5193 :   gimple *old_stmt = stmt_info->stmt;
     665         5193 :   gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
     666         5193 :   gimple_set_uid (new_stmt, gimple_uid (old_stmt));
     667         5193 :   stmt_info->stmt = new_stmt;
     668         5193 :   gsi_replace (gsi, new_stmt, true);
     669         5193 : }
     670              : 
     671              : /* Insert stmts in SEQ on the VEC_INFO region entry.  If CONTEXT is
     672              :    not NULL it specifies whether to use the sub-region entry
     673              :    determined by it, currently used for loop vectorization to insert
     674              :    on the inner loop entry vs. the outer loop entry.  */
     675              : 
     676              : void
     677       101510 : vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
     678              : {
     679       101510 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (this))
     680              :     {
     681        19134 :       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     682        19134 :       basic_block new_bb;
     683        19134 :       edge pe;
     684              : 
     685        19134 :       if (context && nested_in_vect_loop_p (loop, context))
     686              :         loop = loop->inner;
     687              : 
     688        19134 :       pe = loop_preheader_edge (loop);
     689        19134 :       new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     690        19134 :       gcc_assert (!new_bb);
     691              :     }
     692              :   else
     693              :     {
     694        82376 :       gimple_stmt_iterator gsi_region_begin
     695        82376 :         = gsi_after_labels (bbs[0]);
     696        82376 :       gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
     697              :     }
     698       101510 : }
     699              : 
     700              : /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT.  */
     701              : 
     702              : void
     703         3323 : vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
     704              : {
     705         3323 :   gimple_seq seq = NULL;
     706         3323 :   gimple_stmt_iterator gsi = gsi_start (seq);
     707         3323 :   gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
     708         3323 :   insert_seq_on_entry (context, seq);
     709         3323 : }
     710              : 
     711              : /* Create and initialize a new stmt_vec_info struct for STMT.  */
     712              : 
     713              : stmt_vec_info
     714     60967826 : vec_info::new_stmt_vec_info (gimple *stmt)
     715              : {
     716     60967826 :   stmt_vec_info res = XCNEW (class _stmt_vec_info);
     717     60967826 :   res->stmt = stmt;
     718              : 
     719     60967826 :   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
     720     60967826 :   STMT_VINFO_VECTORIZABLE (res) = true;
     721     60967826 :   STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
     722     60967826 :   STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
     723     60967826 :   STMT_VINFO_REDUC_IDX (res) = -1;
     724     60967826 :   STMT_VINFO_REDUC_DEF (res) = NULL;
     725     60967826 :   STMT_VINFO_SLP_VECT_ONLY (res) = false;
     726     60967826 :   STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
     727              : 
     728     60967826 :   if (is_a <loop_vec_info> (this)
     729      7452589 :       && gimple_code (stmt) == GIMPLE_PHI
     730     62262505 :       && is_loop_header_bb_p (gimple_bb (stmt)))
     731      1284366 :     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
     732              :   else
     733     59683460 :     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
     734              : 
     735     60967826 :   STMT_SLP_TYPE (res) = not_vect;
     736              : 
     737              :   /* This is really "uninitialized" until vect_compute_data_ref_alignment.  */
     738     60967826 :   res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
     739              : 
     740     60967826 :   return res;
     741              : }
     742              : 
     743              : /* Associate STMT with INFO.  */
     744              : 
     745              : void
     746     62449528 : vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
     747              : {
     748     62449528 :   unsigned int uid = gimple_uid (stmt);
     749     62449528 :   if (uid == 0)
     750              :     {
     751     60967826 :       gcc_assert (!check_ro || !stmt_vec_info_ro);
     752     60967826 :       gcc_checking_assert (info);
     753     60967826 :       uid = stmt_vec_infos.length () + 1;
     754     60967826 :       gimple_set_uid (stmt, uid);
     755     60967826 :       stmt_vec_infos.safe_push (info);
     756              :     }
     757              :   else
     758              :     {
     759      1481702 :       gcc_checking_assert (info == NULL);
     760      1481702 :       stmt_vec_infos[uid - 1] = info;
     761              :     }
     762     62449528 : }
     763              : 
     764              : /* Free the contents of stmt_vec_infos.  */
     765              : 
     766              : void
     767      2697078 : vec_info::free_stmt_vec_infos (void)
     768              : {
     769     69059060 :   for (stmt_vec_info &info : stmt_vec_infos)
     770     60967826 :     if (info != NULL)
     771     59486124 :       free_stmt_vec_info (info);
     772      2697078 :   stmt_vec_infos.release ();
     773      2697078 : }
     774              : 
     775              : /* Free STMT_INFO.  */
     776              : 
     777              : void
     778     60967826 : vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
     779              : {
     780     60967826 :   if (stmt_info->pattern_stmt_p)
     781              :     {
     782      2312539 :       gimple_set_bb (stmt_info->stmt, NULL);
     783      2312539 :       tree lhs = gimple_get_lhs (stmt_info->stmt);
     784      2312539 :       if (lhs && TREE_CODE (lhs) == SSA_NAME)
     785      1948688 :         release_ssa_name (lhs);
     786              :     }
     787              : 
     788     60967826 :   free (stmt_info);
     789     60967826 : }
     790              : 
     791              : /* Returns true if S1 dominates S2.  */
     792              : 
     793              : bool
     794       586716 : vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
     795              : {
     796       586716 :   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
     797              : 
     798              :   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
     799              :      SSA_NAME.  Assume it lives at the beginning of function and
     800              :      thus dominates everything.  */
     801       586716 :   if (!bb1 || s1 == s2)
     802              :     return true;
     803              : 
     804              :   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
     805       584801 :   if (!bb2)
     806              :     return false;
     807              : 
     808       584801 :   if (bb1 != bb2)
     809       215466 :     return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
     810              : 
     811              :   /* PHIs in the same basic block are assumed to be
     812              :      executed all in parallel, if only one stmt is a PHI,
     813              :      it dominates the other stmt in the same basic block.  */
     814       369335 :   if (gimple_code (s1) == GIMPLE_PHI)
     815              :     return true;
     816              : 
     817       332900 :   if (gimple_code (s2) == GIMPLE_PHI)
     818              :     return false;
     819              : 
     820              :   /* Inserted vectorized stmts all have UID 0 while the original stmts
     821              :      in the IL have UID increasing within a BB.  Walk from both sides
     822              :      until we find the other stmt or a stmt with UID != 0.  */
     823       315145 :   gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
     824       799530 :   while (gimple_uid (gsi_stmt (gsi1)) == 0)
     825              :     {
     826       601108 :       gsi_next (&gsi1);
     827       601108 :       if (gsi_end_p (gsi1))
     828              :         return false;
     829       599367 :       if (gsi_stmt (gsi1) == s2)
     830              :         return true;
     831              :     }
     832       198422 :   if (gimple_uid (gsi_stmt (gsi1)) == -1u)
     833              :     return false;
     834              : 
     835       198422 :   gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
     836       804478 :   while (gimple_uid (gsi_stmt (gsi2)) == 0)
     837              :     {
     838       617995 :       gsi_prev (&gsi2);
     839       617995 :       if (gsi_end_p (gsi2))
     840              :         return false;
     841       606076 :       if (gsi_stmt (gsi2) == s1)
     842              :         return true;
     843              :     }
     844       186483 :   if (gimple_uid (gsi_stmt (gsi2)) == -1u)
     845              :     return false;
     846              : 
     847       186483 :   if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
     848              :     return true;
     849              :   return false;
     850              : }
     851              : 
     852              : /* A helper function to free scev and LOOP niter information, as well as
     853              :    clear loop constraint LOOP_C_FINITE.  */
     854              : 
     855              : void
     856        58042 : vect_free_loop_info_assumptions (class loop *loop)
     857              : {
     858        58042 :   scev_reset_htab ();
     859              :   /* We need to explicitly reset upper bound information since they are
     860              :      used even after free_numbers_of_iterations_estimates.  */
     861        58042 :   loop->any_upper_bound = false;
     862        58042 :   loop->any_likely_upper_bound = false;
     863        58042 :   free_numbers_of_iterations_estimates (loop);
     864        58042 :   loop_constraint_clear (loop, LOOP_C_FINITE);
     865        58042 : }
     866              : 
     867              : /* If LOOP has been versioned during ifcvt, return the internal call
     868              :    guarding it.  */
     869              : 
     870              : gimple *
     871       523177 : vect_loop_vectorized_call (class loop *loop, gcond **cond)
     872              : {
     873       523177 :   basic_block bb = loop_preheader_edge (loop)->src;
     874       967239 :   gimple *g;
     875      1411301 :   do
     876              :     {
     877       967239 :       g = *gsi_last_bb (bb);
     878       613020 :       if ((g && gimple_code (g) == GIMPLE_COND)
     879      2073338 :           || !single_succ_p (bb))
     880              :         break;
     881       582922 :       if (!single_pred_p (bb))
     882              :         break;
     883       444062 :       bb = single_pred (bb);
     884              :     }
     885              :   while (1);
     886       523177 :   if (g && gimple_code (g) == GIMPLE_COND)
     887              :     {
     888       377740 :       if (cond)
     889            0 :         *cond = as_a <gcond *> (g);
     890       377740 :       gimple_stmt_iterator gsi = gsi_for_stmt (g);
     891       377740 :       gsi_prev (&gsi);
     892       377740 :       if (!gsi_end_p (gsi))
     893              :         {
     894       346687 :           g = gsi_stmt (gsi);
     895       346687 :           if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
     896       346687 :               && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
     897        30140 :                   || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
     898        60625 :             return g;
     899              :         }
     900              :     }
     901              :   return NULL;
     902              : }
     903              : 
     904              : /* If LOOP has been versioned during loop distribution, return the gurading
     905              :    internal call.  */
     906              : 
     907              : static gimple *
     908       486396 : vect_loop_dist_alias_call (class loop *loop, function *fun)
     909              : {
     910       486396 :   basic_block bb;
     911       486396 :   basic_block entry;
     912       486396 :   class loop *outer, *orig;
     913              : 
     914       486396 :   if (loop->orig_loop_num == 0)
     915              :     return NULL;
     916              : 
     917          162 :   orig = get_loop (fun, loop->orig_loop_num);
     918          162 :   if (orig == NULL)
     919              :     {
     920              :       /* The original loop is somehow destroyed.  Clear the information.  */
     921            0 :       loop->orig_loop_num = 0;
     922            0 :       return NULL;
     923              :     }
     924              : 
     925          162 :   if (loop != orig)
     926           91 :     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
     927              :   else
     928           71 :     bb = loop_preheader_edge (loop)->src;
     929              : 
     930          162 :   outer = bb->loop_father;
     931          162 :   entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
     932              : 
     933              :   /* Look upward in dominance tree.  */
     934          743 :   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
     935          581 :        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
     936              :     {
     937          691 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
     938          691 :       if (!safe_is_a <gcond *> (*gsi))
     939          581 :         continue;
     940              : 
     941          508 :       gsi_prev (&gsi);
     942          508 :       if (gsi_end_p (gsi))
     943            8 :         continue;
     944              : 
     945          500 :       gimple *g = gsi_stmt (gsi);
     946              :       /* The guarding internal function call must have the same distribution
     947              :          alias id.  */
     948          500 :       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
     949          500 :           && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
     950       486396 :         return g;
     951              :     }
     952              :   return NULL;
     953              : }
     954              : 
     955              : /* Set the uids of all the statements in basic blocks inside loop
     956              :    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
     957              :    call guarding the loop which has been if converted.  */
     958              : static void
     959         7768 : set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
     960              :                   function *fun)
     961              : {
     962         7768 :   tree arg = gimple_call_arg (loop_vectorized_call, 1);
     963         7768 :   basic_block *bbs;
     964         7768 :   unsigned int i;
     965         7768 :   class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
     966              : 
     967         7768 :   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
     968         7768 :   LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo)
     969         7768 :     = vec_init_loop_exit_info (scalar_loop);
     970         7768 :   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
     971              :                        == loop_vectorized_call);
     972              :   /* If we are going to vectorize outer loop, prevent vectorization
     973              :      of the inner loop in the scalar loop - either the scalar loop is
     974              :      thrown away, so it is a wasted work, or is used only for
     975              :      a few iterations.  */
     976         7768 :   if (scalar_loop->inner)
     977              :     {
     978          102 :       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
     979          102 :       if (g)
     980              :         {
     981          102 :           arg = gimple_call_arg (g, 0);
     982          102 :           get_loop (fun, tree_to_shwi (arg))->dont_vectorize = true;
     983          102 :           fold_loop_internal_call (g, boolean_false_node);
     984              :         }
     985              :     }
     986         7768 :   bbs = get_loop_body (scalar_loop);
     987        42237 :   for (i = 0; i < scalar_loop->num_nodes; i++)
     988              :     {
     989        34469 :       basic_block bb = bbs[i];
     990        34469 :       gimple_stmt_iterator gsi;
     991        66243 :       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     992              :         {
     993        31774 :           gimple *phi = gsi_stmt (gsi);
     994        31774 :           gimple_set_uid (phi, 0);
     995              :         }
     996       174952 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     997              :         {
     998       106014 :           gimple *stmt = gsi_stmt (gsi);
     999       106014 :           gimple_set_uid (stmt, 0);
    1000              :         }
    1001              :     }
    1002         7768 :   free (bbs);
    1003         7768 : }
    1004              : 
    1005              : /* Generate vectorized code for LOOP and its epilogues.  */
    1006              : 
    1007              : static unsigned
    1008        61418 : vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1009              :                       loop_p loop, gimple *loop_vectorized_call,
    1010              :                       function *fun)
    1011              : {
    1012        61418 :   loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
    1013              : 
    1014        61418 :   if (loop_vectorized_call)
    1015         7768 :     set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
    1016              : 
    1017        61418 :   unsigned HOST_WIDE_INT bytes;
    1018        61418 :   if (dump_enabled_p ())
    1019              :     {
    1020        21894 :       if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
    1021        10947 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1022              :                          "%sloop vectorized using %s%wu byte vectors and"
    1023              :                          " unroll factor %u\n",
    1024        10947 :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1025              :                          ? "epilogue " : "",
    1026        10947 :                          LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    1027              :                          ? "masked " : "", bytes,
    1028              :                          (unsigned int) LOOP_VINFO_VECT_FACTOR
    1029        10947 :                                                  (loop_vinfo).to_constant ());
    1030              :       else
    1031              :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
    1032              :                          "%sloop vectorized using variable length vectors\n",
    1033              :                          LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    1034              :                          ? "epilogue " : "");
    1035              :     }
    1036              : 
    1037        61418 :   loop_p new_loop = vect_transform_loop (loop_vinfo,
    1038              :                                          loop_vectorized_call);
    1039              :   /* Now that the loop has been vectorized, allow it to be unrolled
    1040              :      etc.  */
    1041        61418 :   loop->force_vectorize = false;
    1042              : 
    1043        61418 :   if (loop->simduid)
    1044              :     {
    1045         1895 :       simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
    1046         1895 :       if (!simduid_to_vf_htab)
    1047         1535 :         simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
    1048         1895 :       simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
    1049         1895 :       simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
    1050         1895 :       *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
    1051         1895 :           = simduid_to_vf_data;
    1052              :     }
    1053              : 
    1054              :   /* We should not have to update virtual SSA form here but some
    1055              :      transforms involve creating new virtual definitions which makes
    1056              :      updating difficult.
    1057              :      We delay the actual update to the end of the pass but avoid
    1058              :      confusing ourselves by forcing need_ssa_update_p () to false.  */
    1059        61418 :   unsigned todo = 0;
    1060        61418 :   if (need_ssa_update_p (cfun))
    1061              :     {
    1062          119 :       gcc_assert (loop_vinfo->any_known_not_updated_vssa);
    1063          119 :       fun->gimple_df->ssa_renaming_needed = false;
    1064          119 :       todo |= TODO_update_ssa_only_virtuals;
    1065              :     }
    1066        61418 :   gcc_assert (!need_ssa_update_p (cfun));
    1067              : 
    1068              :   /* Epilogue of vectorized loop must be vectorized too.  */
    1069        61418 :   if (new_loop)
    1070         7056 :     todo |= vect_transform_loops (simduid_to_vf_htab, new_loop, NULL, fun);
    1071              : 
    1072        61418 :   return todo;
    1073              : }
    1074              : 
    1075              : /* Try to vectorize LOOP.  */
    1076              : 
    1077              : static unsigned
    1078       469419 : try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1079              :                       unsigned *num_vectorized_loops, loop_p loop,
    1080              :                       gimple *loop_vectorized_call,
    1081              :                       gimple *loop_dist_alias_call,
    1082              :                       function *fun)
    1083              : {
    1084       469419 :   unsigned ret = 0;
    1085       469419 :   vec_info_shared shared;
    1086       469419 :   auto_purge_vect_location sentinel;
    1087       469419 :   vect_location = find_loop_location (loop);
    1088              : 
    1089       469419 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    1090       469419 :       && dump_enabled_p ())
    1091        14954 :     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
    1092              :                  "\nAnalyzing loop at %s:%d\n",
    1093        14954 :                  LOCATION_FILE (vect_location.get_location_t ()),
    1094        29908 :                  LOCATION_LINE (vect_location.get_location_t ()));
    1095              : 
    1096              :   /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
    1097       469419 :   opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, loop_vectorized_call,
    1098              :                                                     &shared);
    1099       469419 :   loop->aux = loop_vinfo;
    1100              : 
    1101       469419 :   if (!loop_vinfo)
    1102       415053 :     if (dump_enabled_p ())
    1103         5764 :       if (opt_problem *problem = loop_vinfo.get_problem ())
    1104              :         {
    1105         5764 :           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1106              :                            "couldn't vectorize loop\n");
    1107         5764 :           problem->emit_and_clear ();
    1108              :         }
    1109              : 
    1110       469419 :   if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
    1111              :     {
    1112              :       /* Free existing information if loop is analyzed with some
    1113              :          assumptions.  */
    1114       415053 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1115         7872 :         vect_free_loop_info_assumptions (loop);
    1116              : 
    1117              :       /* If we applied if-conversion then try to vectorize the
    1118              :          BB of innermost loops.
    1119              :          ???  Ideally BB vectorization would learn to vectorize
    1120              :          control flow by applying if-conversion on-the-fly, the
    1121              :          following retains the if-converted loop body even when
    1122              :          only non-if-converted parts took part in BB vectorization.  */
    1123       415053 :       if (flag_tree_slp_vectorize != 0
    1124       414027 :           && loop_vectorized_call
    1125        21021 :           && ! loop->inner)
    1126              :         {
    1127        20204 :           basic_block bb = loop->header;
    1128        20204 :           bool require_loop_vectorize = false;
    1129        40408 :           for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
    1130       697238 :                !gsi_end_p (gsi); gsi_next (&gsi))
    1131              :             {
    1132       677855 :               gimple *stmt = gsi_stmt (gsi);
    1133       677855 :               gcall *call = dyn_cast <gcall *> (stmt);
    1134         1199 :               if (call && gimple_call_internal_p (call))
    1135              :                 {
    1136         1102 :                   internal_fn ifn = gimple_call_internal_fn (call);
    1137         1102 :                   if (ifn == IFN_MASK_LOAD
    1138         1102 :                       || ifn == IFN_MASK_STORE
    1139          684 :                       || ifn == IFN_MASK_CALL
    1140              :                       /* Don't keep the if-converted parts when the ifn with
    1141              :                          specific type is not supported by the backend.  */
    1142         1767 :                       || (direct_internal_fn_p (ifn)
    1143          384 :                           && !direct_internal_fn_supported_p
    1144          384 :                           (call, OPTIMIZE_FOR_SPEED)))
    1145              :                     {
    1146              :                       require_loop_vectorize = true;
    1147              :                       break;
    1148              :                     }
    1149              :                 }
    1150       677034 :               gimple_set_uid (stmt, -1);
    1151       677034 :               gimple_set_visited (stmt, false);
    1152              :             }
    1153        20204 :           if (!require_loop_vectorize)
    1154              :             {
    1155        19383 :               tree arg = gimple_call_arg (loop_vectorized_call, 1);
    1156        19383 :               class loop *scalar_loop = get_loop (fun, tree_to_shwi (arg));
    1157        19383 :               if (vect_slp_if_converted_bb (bb, scalar_loop))
    1158              :                 {
    1159           75 :                   fold_loop_internal_call (loop_vectorized_call,
    1160              :                                            boolean_true_node);
    1161           75 :                   loop_vectorized_call = NULL;
    1162           75 :                   ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
    1163              :                 }
    1164              :             }
    1165              :         }
    1166              :       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
    1167              :          loop, don't vectorize its inner loop; we'll attempt to
    1168              :          vectorize LOOP_VECTORIZED guarded inner loop of the scalar
    1169              :          loop version.  */
    1170        22047 :       if (loop_vectorized_call && loop->inner)
    1171          819 :         loop->inner->dont_vectorize = true;
    1172       415053 :       return ret;
    1173              :     }
    1174              : 
    1175        54366 :   if (!dbg_cnt (vect_loop))
    1176              :     {
    1177              :       /* Free existing information if loop is analyzed with some
    1178              :          assumptions.  */
    1179            4 :       if (loop_constraint_set_p (loop, LOOP_C_FINITE))
    1180            0 :         vect_free_loop_info_assumptions (loop);
    1181            4 :       return ret;
    1182              :     }
    1183              : 
    1184        54362 :   (*num_vectorized_loops)++;
    1185              :   /* Transform LOOP and its epilogues.  */
    1186        54362 :   ret |= vect_transform_loops (simduid_to_vf_htab, loop,
    1187              :                                loop_vectorized_call, fun);
    1188              : 
    1189        54362 :   if (loop_vectorized_call)
    1190              :     {
    1191         7768 :       fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
    1192         7768 :       ret |= TODO_cleanup_cfg;
    1193              :     }
    1194        54362 :   if (loop_dist_alias_call)
    1195              :     {
    1196            8 :       tree value = gimple_call_arg (loop_dist_alias_call, 1);
    1197            8 :       fold_loop_internal_call (loop_dist_alias_call, value);
    1198            8 :       ret |= TODO_cleanup_cfg;
    1199              :     }
    1200              : 
    1201              :   return ret;
    1202       469419 : }
    1203              : 
    1204              : /* Try to vectorize LOOP.  */
    1205              : 
    1206              : static unsigned
    1207       500741 : try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
    1208              :                     unsigned *num_vectorized_loops, loop_p loop,
    1209              :                     function *fun)
    1210              : {
    1211       500741 :   if (!((flag_tree_loop_vectorize
    1212       496159 :          && optimize_loop_nest_for_speed_p (loop))
    1213        33125 :         || loop->force_vectorize))
    1214              :     return 0;
    1215              : 
    1216       469419 :   return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
    1217              :                                vect_loop_vectorized_call (loop),
    1218       469419 :                                vect_loop_dist_alias_call (loop, fun), fun);
    1219              : }
    1220              : 
    1221              : 
    1222              : /* Loop autovectorization.  */
    1223              : 
    1224              : namespace {
    1225              : 
    1226              : const pass_data pass_data_vectorize =
    1227              : {
    1228              :   GIMPLE_PASS, /* type */
    1229              :   "vect", /* name */
    1230              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1231              :   TV_TREE_VECTORIZATION, /* tv_id */
    1232              :   ( PROP_cfg | PROP_ssa ), /* properties_required */
    1233              :   0, /* properties_provided */
    1234              :   0, /* properties_destroyed */
    1235              :   0, /* todo_flags_start */
    1236              :   0, /* todo_flags_finish */
    1237              : };
    1238              : 
    1239              : class pass_vectorize : public gimple_opt_pass
    1240              : {
    1241              : public:
    1242       285722 :   pass_vectorize (gcc::context *ctxt)
    1243       571444 :     : gimple_opt_pass (pass_data_vectorize, ctxt)
    1244              :   {}
    1245              : 
    1246              :   /* opt_pass methods: */
    1247       241458 :   bool gate (function *fun) final override
    1248              :     {
    1249       241458 :       return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
    1250              :     }
    1251              : 
    1252              :   unsigned int execute (function *) final override;
    1253              : 
    1254              : }; // class pass_vectorize
    1255              : 
    1256              : /* Function vectorize_loops.
    1257              : 
    1258              :    Entry point to loop vectorization phase.  */
    1259              : 
    1260              : unsigned
    1261       208529 : pass_vectorize::execute (function *fun)
    1262              : {
    1263       208529 :   unsigned int i;
    1264       208529 :   unsigned int num_vectorized_loops = 0;
    1265       208529 :   unsigned int vect_loops_num;
    1266       208529 :   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
    1267       208529 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1268       208529 :   bool any_ifcvt_loops = false;
    1269       208529 :   unsigned ret = 0;
    1270              : 
    1271       208529 :   vect_loops_num = number_of_loops (fun);
    1272              : 
    1273              :   /* Bail out if there are no loops.  */
    1274       208529 :   if (vect_loops_num <= 1)
    1275              :     return 0;
    1276              : 
    1277       208529 :   vect_slp_init ();
    1278              : 
    1279       208529 :   if (fun->has_simduid_loops)
    1280         5601 :     note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1281              : 
    1282              :   /*  ----------- Analyze loops. -----------  */
    1283       208529 :   enable_ranger (fun);
    1284              : 
    1285              :   /* If some loop was duplicated, it gets bigger number
    1286              :      than all previously defined loops.  This fact allows us to run
    1287              :      only over initial loops skipping newly generated ones.  */
    1288      1161078 :   for (auto loop : loops_list (fun, 0))
    1289       535491 :     if (loop->dont_vectorize)
    1290              :       {
    1291        35671 :         any_ifcvt_loops = true;
    1292              :         /* If-conversion sometimes versions both the outer loop
    1293              :            (for the case when outer loop vectorization might be
    1294              :            desirable) as well as the inner loop in the scalar version
    1295              :            of the loop.  So we have:
    1296              :             if (LOOP_VECTORIZED (1, 3))
    1297              :               {
    1298              :                 loop1
    1299              :                   loop2
    1300              :               }
    1301              :             else
    1302              :               loop3 (copy of loop1)
    1303              :                 if (LOOP_VECTORIZED (4, 5))
    1304              :                   loop4 (copy of loop2)
    1305              :                 else
    1306              :                   loop5 (copy of loop4)
    1307              :            If loops' iteration gives us loop3 first (which has
    1308              :            dont_vectorize set), make sure to process loop1 before loop4;
    1309              :            so that we can prevent vectorization of loop4 if loop1
    1310              :            is successfully vectorized.  */
    1311        35671 :         if (loop->inner)
    1312              :           {
    1313         2164 :             gimple *loop_vectorized_call
    1314         2164 :               = vect_loop_vectorized_call (loop);
    1315         2164 :             if (loop_vectorized_call
    1316         2164 :                 && vect_loop_vectorized_call (loop->inner))
    1317              :               {
    1318          921 :                 tree arg = gimple_call_arg (loop_vectorized_call, 0);
    1319          921 :                 class loop *vector_loop
    1320          921 :                   = get_loop (fun, tree_to_shwi (arg));
    1321          921 :                 if (vector_loop && vector_loop != loop)
    1322              :                   {
    1323              :                     /* Make sure we don't vectorize it twice.  */
    1324          921 :                     vector_loop->dont_vectorize = true;
    1325          921 :                     ret |= try_vectorize_loop (simduid_to_vf_htab,
    1326              :                                                &num_vectorized_loops,
    1327              :                                                vector_loop, fun);
    1328              :                   }
    1329              :               }
    1330              :           }
    1331              :       }
    1332              :     else
    1333       499820 :       ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops,
    1334       208529 :                                  loop, fun);
    1335              : 
    1336       208529 :   vect_location = dump_user_location_t ();
    1337              : 
    1338       208529 :   statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
    1339       208529 :   if (dump_enabled_p ()
    1340       208529 :       || (num_vectorized_loops > 0 && dump_enabled_p ()))
    1341        11599 :     dump_printf_loc (MSG_NOTE, vect_location,
    1342              :                      "vectorized %u loops in function.\n",
    1343              :                      num_vectorized_loops);
    1344              : 
    1345              :   /*  ----------- Finalize. -----------  */
    1346       208529 :   disable_ranger (fun);
    1347              : 
    1348       208529 :   if (any_ifcvt_loops)
    1349       322008 :     for (i = 1; i < number_of_loops (fun); i++)
    1350              :       {
    1351       139332 :         class loop *loop = get_loop (fun, i);
    1352       139332 :         if (loop && loop->dont_vectorize)
    1353              :           {
    1354        38224 :             gimple *g = vect_loop_vectorized_call (loop);
    1355        38224 :             if (g)
    1356              :               {
    1357        21247 :                 fold_loop_internal_call (g, boolean_false_node);
    1358        21247 :                 loop->dont_vectorize = false;
    1359        21247 :                 ret |= TODO_cleanup_cfg;
    1360        21247 :                 g = NULL;
    1361              :               }
    1362              :             else
    1363        16977 :               g = vect_loop_dist_alias_call (loop, fun);
    1364              : 
    1365        38224 :             if (g)
    1366              :               {
    1367           28 :                 fold_loop_internal_call (g, boolean_false_node);
    1368           28 :                 loop->dont_vectorize = false;
    1369           28 :                 ret |= TODO_cleanup_cfg;
    1370              :               }
    1371              :           }
    1372              :       }
    1373              : 
    1374              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1375       208529 :   if (fun->has_simduid_loops)
    1376              :     {
    1377         5601 :       adjust_simduid_builtins (simduid_to_vf_htab, fun);
    1378              :       /* Avoid stale SCEV cache entries for the SIMD_LANE defs.  */
    1379         5601 :       scev_reset ();
    1380              :     }
    1381              :   /* Shrink any "omp array simd" temporary arrays to the
    1382              :      actual vectorization factors.  */
    1383       208529 :   if (simd_array_to_simduid_htab)
    1384         2204 :     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
    1385       208529 :   delete simduid_to_vf_htab;
    1386       208529 :   fun->has_simduid_loops = false;
    1387              : 
    1388       208529 :   if (num_vectorized_loops > 0)
    1389              :     {
    1390              :       /* We are collecting some corner cases where we need to update
    1391              :          virtual SSA form via the TODO but delete the queued update-SSA
    1392              :          state.  Force renaming if we think that might be necessary.  */
    1393        36820 :       if (ret & TODO_update_ssa_only_virtuals)
    1394           89 :         mark_virtual_operands_for_renaming (cfun);
    1395              :       /* If we vectorized any loop only virtual SSA form needs to be updated.
    1396              :          ???  Also while we try hard to update loop-closed SSA form we fail
    1397              :          to properly do this in some corner-cases (see PR56286).  */
    1398        36820 :       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
    1399        36820 :       ret |= TODO_cleanup_cfg;
    1400              :     }
    1401              : 
    1402      1873052 :   for (i = 1; i < number_of_loops (fun); i++)
    1403              :     {
    1404       727997 :       loop_vec_info loop_vinfo;
    1405       727997 :       bool has_mask_store;
    1406              : 
    1407       727997 :       class loop *loop = get_loop (fun, i);
    1408       727997 :       if (!loop || !loop->aux)
    1409       666575 :         continue;
    1410        61422 :       loop_vinfo = (loop_vec_info) loop->aux;
    1411        61422 :       has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
    1412        61422 :       delete loop_vinfo;
    1413        61422 :       if (has_mask_store
    1414        61422 :           && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
    1415          499 :         optimize_mask_stores (loop);
    1416              : 
    1417        61422 :       auto_bitmap exit_bbs;
    1418              :       /* Perform local CSE, this esp. helps because we emit code for
    1419              :          predicates that need to be shared for optimal predicate usage.
    1420              :          However reassoc will re-order them and prevent CSE from working
    1421              :          as it should.  CSE only the loop body, not the entry.  */
    1422        61422 :       auto_vec<edge> exits = get_loop_exit_edges (loop);
    1423       247234 :       for (edge exit : exits)
    1424        62968 :         bitmap_set_bit (exit_bbs, exit->dest->index);
    1425              : 
    1426        61422 :       edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
    1427        61422 :       do_rpo_vn (fun, entry, exit_bbs);
    1428              : 
    1429        61422 :       loop->aux = NULL;
    1430        61422 :     }
    1431              : 
    1432       208529 :   vect_slp_fini ();
    1433              : 
    1434       208529 :   return ret;
    1435              : }
    1436              : 
    1437              : } // anon namespace
    1438              : 
    1439              : gimple_opt_pass *
    1440       285722 : make_pass_vectorize (gcc::context *ctxt)
    1441              : {
    1442       285722 :   return new pass_vectorize (ctxt);
    1443              : }
    1444              : 
    1445              : /* Entry point to the simduid cleanup pass.  */
    1446              : 
    1447              : namespace {
    1448              : 
    1449              : const pass_data pass_data_simduid_cleanup =
    1450              : {
    1451              :   GIMPLE_PASS, /* type */
    1452              :   "simduid", /* name */
    1453              :   OPTGROUP_NONE, /* optinfo_flags */
    1454              :   TV_NONE, /* tv_id */
    1455              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1456              :   0, /* properties_provided */
    1457              :   0, /* properties_destroyed */
    1458              :   0, /* todo_flags_start */
    1459              :   0, /* todo_flags_finish */
    1460              : };
    1461              : 
    1462              : class pass_simduid_cleanup : public gimple_opt_pass
    1463              : {
    1464              : public:
    1465       571444 :   pass_simduid_cleanup (gcc::context *ctxt)
    1466      1142888 :     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
    1467              :   {}
    1468              : 
    1469              :   /* opt_pass methods: */
    1470       285722 :   opt_pass * clone () final override
    1471              :   {
    1472       285722 :     return new pass_simduid_cleanup (m_ctxt);
    1473              :   }
    1474      2513634 :   bool gate (function *fun) final override { return fun->has_simduid_loops; }
    1475              :   unsigned int execute (function *) final override;
    1476              : 
    1477              : }; // class pass_simduid_cleanup
    1478              : 
    1479              : unsigned int
    1480         2208 : pass_simduid_cleanup::execute (function *fun)
    1481              : {
    1482         2208 :   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
    1483              : 
    1484         2208 :   note_simd_array_uses (&simd_array_to_simduid_htab, fun);
    1485              : 
    1486              :   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
    1487         2208 :   adjust_simduid_builtins (NULL, fun);
    1488              : 
    1489              :   /* Shrink any "omp array simd" temporary arrays to the
    1490              :      actual vectorization factors.  */
    1491         2208 :   if (simd_array_to_simduid_htab)
    1492            4 :     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
    1493         2208 :   fun->has_simduid_loops = false;
    1494         2208 :   return 0;
    1495              : }
    1496              : 
    1497              : }  // anon namespace
    1498              : 
    1499              : gimple_opt_pass *
    1500       285722 : make_pass_simduid_cleanup (gcc::context *ctxt)
    1501              : {
    1502       285722 :   return new pass_simduid_cleanup (ctxt);
    1503              : }
    1504              : 
    1505              : 
    1506              : /*  Entry point to basic block SLP phase.  */
    1507              : 
    1508              : namespace {
    1509              : 
    1510              : const pass_data pass_data_slp_vectorize =
    1511              : {
    1512              :   GIMPLE_PASS, /* type */
    1513              :   "slp", /* name */
    1514              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1515              :   TV_TREE_SLP_VECTORIZATION, /* tv_id */
    1516              :   ( PROP_ssa | PROP_cfg ), /* properties_required */
    1517              :   0, /* properties_provided */
    1518              :   0, /* properties_destroyed */
    1519              :   0, /* todo_flags_start */
    1520              :   TODO_update_ssa, /* todo_flags_finish */
    1521              : };
    1522              : 
    1523              : class pass_slp_vectorize : public gimple_opt_pass
    1524              : {
    1525              : public:
    1526       571444 :   pass_slp_vectorize (gcc::context *ctxt)
    1527      1142888 :     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
    1528              :   {}
    1529              : 
    1530              :   /* opt_pass methods: */
    1531       285722 :   opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); }
    1532      1041489 :   bool gate (function *) final override { return flag_tree_slp_vectorize != 0; }
    1533              :   unsigned int execute (function *) final override;
    1534              : 
    1535              : }; // class pass_slp_vectorize
    1536              : 
    1537              : unsigned int
    1538       909169 : pass_slp_vectorize::execute (function *fun)
    1539              : {
    1540       909169 :   auto_purge_vect_location sentinel;
    1541       909169 :   basic_block bb;
    1542              : 
    1543       909169 :   bool in_loop_pipeline = scev_initialized_p ();
    1544       909169 :   if (!in_loop_pipeline)
    1545              :     {
    1546       702269 :       loop_optimizer_init (LOOPS_NORMAL);
    1547       702269 :       scev_initialize ();
    1548              :     }
    1549              : 
    1550              :   /* Mark all stmts as not belonging to the current region and unvisited.  */
    1551     11588604 :   FOR_EACH_BB_FN (bb, fun)
    1552              :     {
    1553     15461299 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    1554      4781864 :            gsi_next (&gsi))
    1555              :         {
    1556      4781864 :           gphi *stmt = gsi.phi ();
    1557      4781864 :           gimple_set_uid (stmt, -1);
    1558      4781864 :           gimple_set_visited (stmt, false);
    1559              :         }
    1560    100041640 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    1561     78682770 :            gsi_next (&gsi))
    1562              :         {
    1563     78682770 :           gimple *stmt = gsi_stmt (gsi);
    1564     78682770 :           gimple_set_uid (stmt, -1);
    1565     78682770 :           gimple_set_visited (stmt, false);
    1566              :         }
    1567              :     }
    1568              : 
    1569       909169 :   vect_slp_init ();
    1570              : 
    1571       909169 :   vect_slp_function (fun);
    1572              : 
    1573       909169 :   vect_slp_fini ();
    1574              : 
    1575       909169 :   if (!in_loop_pipeline)
    1576              :     {
    1577       702269 :       scev_finalize ();
    1578       702269 :       loop_optimizer_finalize ();
    1579              :     }
    1580              : 
    1581      1818338 :   return 0;
    1582       909169 : }
    1583              : 
    1584              : } // anon namespace
    1585              : 
    1586              : gimple_opt_pass *
    1587       285722 : make_pass_slp_vectorize (gcc::context *ctxt)
    1588              : {
    1589       285722 :   return new pass_slp_vectorize (ctxt);
    1590              : }
    1591              : 
    1592              : 
    1593              : /* Increase alignment of global arrays to improve vectorization potential.
    1594              :    TODO:
    1595              :    - Consider also structs that have an array field.
    1596              :    - Use ipa analysis to prune arrays that can't be vectorized?
    1597              :      This should involve global alignment analysis and in the future also
    1598              :      array padding.  */
    1599              : 
    1600              : static unsigned get_vec_alignment_for_type (tree);
    1601              : static hash_map<tree, unsigned> *type_align_map;
    1602              : 
    1603              : /* Return alignment of array's vector type corresponding to scalar type.
    1604              :    0 if no vector type exists.  */
    1605              : static unsigned
    1606            0 : get_vec_alignment_for_array_type (tree type)
    1607              : {
    1608            0 :   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
    1609            0 :   poly_uint64 array_size, vector_size;
    1610              : 
    1611            0 :   tree scalar_type = strip_array_types (type);
    1612            0 :   tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
    1613            0 :   if (!vectype
    1614            0 :       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
    1615            0 :       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
    1616            0 :       || maybe_lt (array_size, vector_size))
    1617            0 :     return 0;
    1618              : 
    1619            0 :   return TYPE_ALIGN (vectype);
    1620              : }
    1621              : 
    1622              : /* Return alignment of field having maximum alignment of vector type
    1623              :    corresponding to it's scalar type. For now, we only consider fields whose
    1624              :    offset is a multiple of it's vector alignment.
    1625              :    0 if no suitable field is found.  */
    1626              : static unsigned
    1627            0 : get_vec_alignment_for_record_type (tree type)
    1628              : {
    1629            0 :   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
    1630              : 
    1631            0 :   unsigned max_align = 0, alignment;
    1632            0 :   HOST_WIDE_INT offset;
    1633            0 :   tree offset_tree;
    1634              : 
    1635            0 :   if (TYPE_PACKED (type))
    1636              :     return 0;
    1637              : 
    1638            0 :   unsigned *slot = type_align_map->get (type);
    1639            0 :   if (slot)
    1640            0 :     return *slot;
    1641              : 
    1642            0 :   for (tree field = first_field (type);
    1643            0 :        field != NULL_TREE;
    1644            0 :        field = DECL_CHAIN (field))
    1645              :     {
    1646              :       /* Skip if not FIELD_DECL or if alignment is set by user.  */
    1647            0 :       if (TREE_CODE (field) != FIELD_DECL
    1648            0 :           || DECL_USER_ALIGN (field)
    1649            0 :           || DECL_ARTIFICIAL (field))
    1650            0 :         continue;
    1651              : 
    1652              :       /* We don't need to process the type further if offset is variable,
    1653              :          since the offsets of remaining members will also be variable.  */
    1654            0 :       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
    1655            0 :           || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
    1656              :         break;
    1657              : 
    1658              :       /* Similarly stop processing the type if offset_tree
    1659              :          does not fit in unsigned HOST_WIDE_INT.  */
    1660            0 :       offset_tree = bit_position (field);
    1661            0 :       if (!tree_fits_uhwi_p (offset_tree))
    1662              :         break;
    1663              : 
    1664            0 :       offset = tree_to_uhwi (offset_tree);
    1665            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
    1666              : 
    1667              :       /* Get maximum alignment of vectorized field/array among those members
    1668              :          whose offset is multiple of the vector alignment.  */
    1669            0 :       if (alignment
    1670            0 :           && (offset % alignment == 0)
    1671            0 :           && (alignment > max_align))
    1672            0 :         max_align = alignment;
    1673              :     }
    1674              : 
    1675            0 :   type_align_map->put (type, max_align);
    1676            0 :   return max_align;
    1677              : }
    1678              : 
    1679              : /* Return alignment of vector type corresponding to decl's scalar type
    1680              :    or 0 if it doesn't exist or the vector alignment is lesser than
    1681              :    decl's alignment.  */
    1682              : static unsigned
    1683            0 : get_vec_alignment_for_type (tree type)
    1684              : {
    1685            0 :   if (type == NULL_TREE)
    1686              :     return 0;
    1687              : 
    1688            0 :   gcc_assert (TYPE_P (type));
    1689              : 
    1690            0 :   static unsigned alignment = 0;
    1691            0 :   switch (TREE_CODE (type))
    1692              :     {
    1693            0 :       case ARRAY_TYPE:
    1694            0 :         alignment = get_vec_alignment_for_array_type (type);
    1695            0 :         break;
    1696            0 :       case RECORD_TYPE:
    1697            0 :         alignment = get_vec_alignment_for_record_type (type);
    1698            0 :         break;
    1699            0 :       default:
    1700            0 :         alignment = 0;
    1701            0 :         break;
    1702              :     }
    1703              : 
    1704            0 :   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
    1705              : }
    1706              : 
    1707              : /* Entry point to increase_alignment pass.  */
    1708              : static unsigned int
    1709            0 : increase_alignment (void)
    1710              : {
    1711            0 :   varpool_node *vnode;
    1712              : 
    1713            0 :   vect_location = dump_user_location_t ();
    1714            0 :   type_align_map = new hash_map<tree, unsigned>;
    1715              : 
    1716              :   /* Increase the alignment of all global arrays for vectorization.  */
    1717            0 :   FOR_EACH_DEFINED_VARIABLE (vnode)
    1718              :     {
    1719            0 :       tree decl = vnode->decl;
    1720            0 :       unsigned int alignment;
    1721              : 
    1722            0 :       if ((decl_in_symtab_p (decl)
    1723            0 :           && !symtab_node::get (decl)->can_increase_alignment_p ())
    1724            0 :           || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
    1725            0 :         continue;
    1726              : 
    1727            0 :       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
    1728            0 :       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
    1729              :         {
    1730            0 :           vnode->increase_alignment (alignment);
    1731            0 :           if (dump_enabled_p ())
    1732            0 :             dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
    1733              :         }
    1734              :     }
    1735              : 
    1736            0 :   delete type_align_map;
    1737            0 :   return 0;
    1738              : }
    1739              : 
    1740              : 
    1741              : namespace {
    1742              : 
    1743              : const pass_data pass_data_ipa_increase_alignment =
    1744              : {
    1745              :   SIMPLE_IPA_PASS, /* type */
    1746              :   "increase_alignment", /* name */
    1747              :   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
    1748              :   TV_IPA_OPT, /* tv_id */
    1749              :   0, /* properties_required */
    1750              :   0, /* properties_provided */
    1751              :   0, /* properties_destroyed */
    1752              :   0, /* todo_flags_start */
    1753              :   0, /* todo_flags_finish */
    1754              : };
    1755              : 
    1756              : class pass_ipa_increase_alignment : public simple_ipa_opt_pass
    1757              : {
    1758              : public:
    1759       285722 :   pass_ipa_increase_alignment (gcc::context *ctxt)
    1760       571444 :     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
    1761              :   {}
    1762              : 
    1763              :   /* opt_pass methods: */
    1764       229960 :   bool gate (function *) final override
    1765              :     {
    1766       229960 :       return flag_section_anchors && flag_tree_loop_vectorize;
    1767              :     }
    1768              : 
    1769            0 :   unsigned int execute (function *) final override
    1770              :   {
    1771            0 :     return increase_alignment ();
    1772              :   }
    1773              : 
    1774              : }; // class pass_ipa_increase_alignment
    1775              : 
    1776              : } // anon namespace
    1777              : 
    1778              : simple_ipa_opt_pass *
    1779       285722 : make_pass_ipa_increase_alignment (gcc::context *ctxt)
    1780              : {
    1781       285722 :   return new pass_ipa_increase_alignment (ctxt);
    1782              : }
    1783              : 
    1784              : /* If the condition represented by T is a comparison or the SSA name
    1785              :    result of a comparison, extract the comparison's operands.  Represent
    1786              :    T as NE_EXPR <T, 0> otherwise.  */
    1787              : 
    1788              : void
    1789        61908 : scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
    1790              : {
    1791        61908 :   if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
    1792              :     {
    1793            0 :       this->code = TREE_CODE (t);
    1794            0 :       this->op0 = TREE_OPERAND (t, 0);
    1795            0 :       this->op1 = TREE_OPERAND (t, 1);
    1796            0 :       this->inverted_p = false;
    1797            0 :       return;
    1798              :     }
    1799              : 
    1800        61908 :   if (TREE_CODE (t) == SSA_NAME)
    1801        24620 :     if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
    1802              :       {
    1803        24620 :         tree_code code = gimple_assign_rhs_code (stmt);
    1804        24620 :         if (TREE_CODE_CLASS (code) == tcc_comparison)
    1805              :           {
    1806        16736 :             this->code = code;
    1807        16736 :             this->op0 = gimple_assign_rhs1 (stmt);
    1808        16736 :             this->op1 = gimple_assign_rhs2 (stmt);
    1809        16736 :             this->inverted_p = false;
    1810        16736 :             return;
    1811              :           }
    1812         7884 :         else if (code == BIT_NOT_EXPR)
    1813              :           {
    1814         3502 :             tree n_op = gimple_assign_rhs1 (stmt);
    1815         3502 :             if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
    1816              :               {
    1817         3502 :                 code = gimple_assign_rhs_code (stmt);
    1818         3502 :                 if (TREE_CODE_CLASS (code) == tcc_comparison)
    1819              :                   {
    1820         3468 :                     this->code = code;
    1821         3468 :                     this->op0 = gimple_assign_rhs1 (stmt);
    1822         3468 :                     this->op1 = gimple_assign_rhs2 (stmt);
    1823         3468 :                     this->inverted_p = true;
    1824         3468 :                     return;
    1825              :                   }
    1826              :               }
    1827              :           }
    1828              :       }
    1829              : 
    1830        41704 :   this->code = NE_EXPR;
    1831        41704 :   this->op0 = t;
    1832        41704 :   this->op1 = build_zero_cst (TREE_TYPE (t));
    1833        41704 :   this->inverted_p = false;
    1834              : }
    1835              : 
    1836              : /* See the comment above the declaration for details.  */
    1837              : 
    1838              : unsigned int
    1839            0 : vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
    1840              :                              stmt_vec_info stmt_info, slp_tree,
    1841              :                              tree vectype, int misalign,
    1842              :                              vect_cost_model_location where)
    1843              : {
    1844            0 :   unsigned int cost
    1845            0 :     = builtin_vectorization_cost (kind, vectype, misalign) * count;
    1846            0 :   return record_stmt_cost (stmt_info, where, cost);
    1847              : }
    1848              : 
    1849              : /* See the comment above the declaration for details.  */
    1850              : 
    1851              : void
    1852      1690129 : vector_costs::finish_cost (const vector_costs *)
    1853              : {
    1854      1690129 :   gcc_assert (!m_finished);
    1855      1690129 :   m_finished = true;
    1856      1690129 : }
    1857              : 
    1858              : /* Record a base cost of COST units against WHERE.  If STMT_INFO is
    1859              :    nonnull, use it to adjust the cost based on execution frequency
    1860              :    (where appropriate).  */
    1861              : 
    1862              : unsigned int
    1863            0 : vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
    1864              :                                 vect_cost_model_location where,
    1865              :                                 unsigned int cost)
    1866              : {
    1867            0 :   cost = adjust_cost_for_freq (stmt_info, where, cost);
    1868            0 :   m_costs[where] += cost;
    1869            0 :   return cost;
    1870              : }
    1871              : 
    1872              : /* COST is the base cost we have calculated for an operation in location WHERE.
    1873              :    If STMT_INFO is nonnull, use it to adjust the cost based on execution
    1874              :    frequency (where appropriate).  Return the adjusted cost.  */
    1875              : 
    1876              : unsigned int
    1877      6656078 : vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
    1878              :                                     vect_cost_model_location where,
    1879              :                                     unsigned int cost)
    1880              : {
    1881              :   /* Statements in an inner loop relative to the loop being
    1882              :      vectorized are weighted more heavily.  The value here is
    1883              :      arbitrary and could potentially be improved with analysis.  */
    1884      6656078 :   if (where == vect_body
    1885      6656078 :       && stmt_info
    1886      6656078 :       && stmt_in_inner_loop_p (m_vinfo, stmt_info))
    1887              :     {
    1888         4578 :       loop_vec_info loop_vinfo = as_a<loop_vec_info> (m_vinfo);
    1889         4578 :       cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
    1890              :     }
    1891      6656078 :   return cost;
    1892              : }
    1893              : 
    1894              : /* See the comment above the declaration for details.  */
    1895              : 
    1896              : bool
    1897            5 : vector_costs::better_main_loop_than_p (const vector_costs *other) const
    1898              : {
    1899            5 :   int diff = compare_inside_loop_cost (other);
    1900            5 :   if (diff != 0)
    1901            5 :     return diff < 0;
    1902              : 
    1903              :   /* If there's nothing to choose between the loop bodies, see whether
    1904              :      there's a difference in the prologue and epilogue costs.  */
    1905            0 :   diff = compare_outside_loop_cost (other);
    1906            0 :   if (diff != 0)
    1907            0 :     return diff < 0;
    1908              : 
    1909              :   return false;
    1910              : }
    1911              : 
    1912              : 
    1913              : /* See the comment above the declaration for details.  */
    1914              : 
    1915              : bool
    1916            0 : vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
    1917              :                                            loop_vec_info main_loop) const
    1918              : {
    1919            0 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    1920            0 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    1921              : 
    1922            0 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    1923            0 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    1924              : 
    1925            0 :   poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
    1926            0 :   unsigned HOST_WIDE_INT main_vf;
    1927            0 :   unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
    1928              :   /* If we can determine how many iterations are left for the epilogue
    1929              :      loop, that is if both the main loop's vectorization factor and number
    1930              :      of iterations are constant, then we use them to calculate the cost of
    1931              :      the epilogue loop together with a 'likely value' for the epilogues
    1932              :      vectorization factor.  Otherwise we use the main loop's vectorization
    1933              :      factor and the maximum poly value for the epilogue's.  If the target
    1934              :      has not provided with a sensible upper bound poly vectorization
    1935              :      factors are likely to be favored over constant ones.  */
    1936            0 :   if (main_poly_vf.is_constant (&main_vf)
    1937            0 :       && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
    1938              :     {
    1939            0 :       unsigned HOST_WIDE_INT niters
    1940            0 :         = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
    1941            0 :       HOST_WIDE_INT other_likely_vf
    1942            0 :         = estimated_poly_value (other_vf, POLY_VALUE_LIKELY);
    1943            0 :       HOST_WIDE_INT this_likely_vf
    1944            0 :         = estimated_poly_value (this_vf, POLY_VALUE_LIKELY);
    1945              : 
    1946              :       /* If the epilogue is using partial vectors we account for the
    1947              :          partial iteration here too.  */
    1948            0 :       other_factor = niters / other_likely_vf;
    1949            0 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
    1950            0 :           && niters % other_likely_vf != 0)
    1951            0 :         other_factor++;
    1952              : 
    1953            0 :       this_factor = niters / this_likely_vf;
    1954            0 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
    1955            0 :           && niters % this_likely_vf != 0)
    1956            0 :         this_factor++;
    1957              :     }
    1958              :   else
    1959              :     {
    1960            0 :       unsigned HOST_WIDE_INT main_vf_max
    1961            0 :         = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
    1962            0 :       unsigned HOST_WIDE_INT other_vf_max
    1963            0 :         = estimated_poly_value (other_vf, POLY_VALUE_MAX);
    1964            0 :       unsigned HOST_WIDE_INT this_vf_max
    1965            0 :         = estimated_poly_value (this_vf, POLY_VALUE_MAX);
    1966              : 
    1967            0 :       other_factor = CEIL (main_vf_max, other_vf_max);
    1968            0 :       this_factor = CEIL (main_vf_max, this_vf_max);
    1969              : 
    1970              :       /* If the loop is not using partial vectors then it will iterate one
    1971              :          time less than one that does.  It is safe to subtract one here,
    1972              :          because the main loop's vf is always at least 2x bigger than that
    1973              :          of an epilogue.  */
    1974            0 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
    1975            0 :         other_factor -= 1;
    1976            0 :       if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
    1977            0 :         this_factor -= 1;
    1978              :     }
    1979              : 
    1980              :   /* Compute the costs by multiplying the inside costs with the factor and
    1981              :      add the outside costs for a more complete picture.  The factor is the
    1982              :      amount of times we are expecting to iterate this epilogue.  */
    1983            0 :   other_cost = other->body_cost () * other_factor;
    1984            0 :   this_cost = this->body_cost () * this_factor;
    1985            0 :   other_cost += other->outside_cost ();
    1986            0 :   this_cost += this->outside_cost ();
    1987            0 :   return this_cost < other_cost;
    1988              : }
    1989              : 
    1990              : /* A <=>-style subroutine of better_main_loop_than_p.  Check whether we can
    1991              :    determine the return value of better_main_loop_than_p by comparing the
    1992              :    inside (loop body) costs of THIS and OTHER.  Return:
    1993              : 
    1994              :    * -1 if better_main_loop_than_p should return true.
    1995              :    * 1 if better_main_loop_than_p should return false.
    1996              :    * 0 if we can't decide.  */
    1997              : 
    1998              : int
    1999            5 : vector_costs::compare_inside_loop_cost (const vector_costs *other) const
    2000              : {
    2001            5 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->m_vinfo);
    2002            5 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
    2003              : 
    2004            5 :   struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
    2005            5 :   gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
    2006              : 
    2007            5 :   poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
    2008            5 :   poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
    2009              : 
    2010              :   /* Limit the VFs to what is likely to be the maximum number of iterations,
    2011              :      to handle cases in which at least one loop_vinfo is fully-masked.  */
    2012            5 :   HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
    2013            5 :   if (estimated_max_niter != -1)
    2014              :     {
    2015            5 :       if (estimated_poly_value (this_vf, POLY_VALUE_MIN)
    2016              :           >= estimated_max_niter)
    2017              :         this_vf = estimated_max_niter;
    2018            5 :       if (estimated_poly_value (other_vf, POLY_VALUE_MIN)
    2019              :           >= estimated_max_niter)
    2020              :         other_vf = estimated_max_niter;
    2021              :     }
    2022              : 
    2023              :   /* Check whether the (fractional) cost per scalar iteration is lower or
    2024              :      higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf.  */
    2025            5 :   poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
    2026            5 :   poly_int64 rel_other
    2027            5 :     = other_loop_vinfo->vector_costs->body_cost () * this_vf;
    2028              : 
    2029            5 :   HOST_WIDE_INT est_rel_this_min
    2030            5 :     = estimated_poly_value (rel_this, POLY_VALUE_MIN);
    2031            5 :   HOST_WIDE_INT est_rel_this_max
    2032            5 :     = estimated_poly_value (rel_this, POLY_VALUE_MAX);
    2033              : 
    2034            5 :   HOST_WIDE_INT est_rel_other_min
    2035            5 :     = estimated_poly_value (rel_other, POLY_VALUE_MIN);
    2036            5 :   HOST_WIDE_INT est_rel_other_max
    2037            5 :     = estimated_poly_value (rel_other, POLY_VALUE_MAX);
    2038              : 
    2039              :   /* Check first if we can make out an unambigous total order from the minimum
    2040              :      and maximum estimates.  */
    2041            5 :   if (est_rel_this_min < est_rel_other_min
    2042              :       && est_rel_this_max < est_rel_other_max)
    2043              :     return -1;
    2044              : 
    2045            4 :   if (est_rel_other_min < est_rel_this_min
    2046              :       && est_rel_other_max < est_rel_this_max)
    2047            4 :     return 1;
    2048              : 
    2049              :   /* When other_loop_vinfo uses a variable vectorization factor,
    2050              :      we know that it has a lower cost for at least one runtime VF.
    2051              :      However, we don't know how likely that VF is.
    2052              : 
    2053              :      One option would be to compare the costs for the estimated VFs.
    2054              :      The problem is that that can put too much pressure on the cost
    2055              :      model.  E.g. if the estimated VF is also the lowest possible VF,
    2056              :      and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
    2057              :      for the estimated VF, we'd then choose this_loop_vinfo even
    2058              :      though (a) this_loop_vinfo might not actually be better than
    2059              :      other_loop_vinfo for that VF and (b) it would be significantly
    2060              :      worse at larger VFs.
    2061              : 
    2062              :      Here we go for a hacky compromise: pick this_loop_vinfo if it is
    2063              :      no more expensive than other_loop_vinfo even after doubling the
    2064              :      estimated other_loop_vinfo VF.  For all but trivial loops, this
    2065              :      ensures that we only pick this_loop_vinfo if it is significantly
    2066              :      better than other_loop_vinfo at the estimated VF.  */
    2067              :   if (est_rel_other_min != est_rel_this_min
    2068              :       || est_rel_other_max != est_rel_this_max)
    2069              :     {
    2070              :       HOST_WIDE_INT est_rel_this_likely
    2071              :         = estimated_poly_value (rel_this, POLY_VALUE_LIKELY);
    2072              :       HOST_WIDE_INT est_rel_other_likely
    2073              :         = estimated_poly_value (rel_other, POLY_VALUE_LIKELY);
    2074              : 
    2075              :       return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
    2076              :     }
    2077              : 
    2078              :   return 0;
    2079              : }
    2080              : 
    2081              : /* A <=>-style subroutine of better_main_loop_than_p, used when there is
    2082              :    nothing to choose between the inside (loop body) costs of THIS and OTHER.
    2083              :    Check whether we can determine the return value of better_main_loop_than_p
    2084              :    by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
    2085              :    Return:
    2086              : 
    2087              :    * -1 if better_main_loop_than_p should return true.
    2088              :    * 1 if better_main_loop_than_p should return false.
    2089              :    * 0 if we can't decide.  */
    2090              : 
    2091              : int
    2092            0 : vector_costs::compare_outside_loop_cost (const vector_costs *other) const
    2093              : {
    2094            0 :   auto this_outside_cost = this->outside_cost ();
    2095            0 :   auto other_outside_cost = other->outside_cost ();
    2096            0 :   if (this_outside_cost != other_outside_cost)
    2097            0 :     return this_outside_cost < other_outside_cost ? -1 : 1;
    2098              : 
    2099              :   return 0;
    2100              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.