LCOV - code coverage report
Current view: top level - gcc - tree-vect-loop-manip.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 81.6 % 1944 1587
Test Date: 2026-02-28 14:20:25 Functions: 85.1 % 47 40
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer Specific Loop Manipulations
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              :    and Ira Rosen <irar@il.ibm.com>
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "cfghooks.h"
      29              : #include "tree-pass.h"
      30              : #include "ssa.h"
      31              : #include "fold-const.h"
      32              : #include "cfganal.h"
      33              : #include "gimplify.h"
      34              : #include "gimple-iterator.h"
      35              : #include "gimplify-me.h"
      36              : #include "tree-cfg.h"
      37              : #include "tree-ssa-loop-manip.h"
      38              : #include "tree-into-ssa.h"
      39              : #include "tree-ssa.h"
      40              : #include "cfgloop.h"
      41              : #include "tree-scalar-evolution.h"
      42              : #include "tree-vectorizer.h"
      43              : #include "tree-ssa-loop-ivopts.h"
      44              : #include "gimple-fold.h"
      45              : #include "tree-ssa-loop-niter.h"
      46              : #include "internal-fn.h"
      47              : #include "stor-layout.h"
      48              : #include "optabs-query.h"
      49              : #include "vec-perm-indices.h"
      50              : #include "insn-config.h"
      51              : #include "rtl.h"
      52              : #include "recog.h"
      53              : #include "langhooks.h"
      54              : #include "tree-vector-builder.h"
      55              : #include "optabs-tree.h"
      56              : #include "hierarchical_discriminator.h"
      57              : 
      58              : 
      59              : /*************************************************************************
      60              :   Simple Loop Peeling Utilities
      61              : 
      62              :   Utilities to support loop peeling for vectorization purposes.
      63              :  *************************************************************************/
      64              : 
      65              : 
      66              : /* Renames the use *OP_P.  */
      67              : 
      68              : static void
      69       916713 : rename_use_op (use_operand_p op_p)
      70              : {
      71       916713 :   tree new_name;
      72              : 
      73       916713 :   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
      74              :     return;
      75              : 
      76       913196 :   new_name = get_current_def (USE_FROM_PTR (op_p));
      77              : 
      78              :   /* Something defined outside of the loop.  */
      79       913196 :   if (!new_name)
      80              :     return;
      81              : 
      82              :   /* An ordinary ssa name defined in the loop.  */
      83              : 
      84       787216 :   SET_USE (op_p, new_name);
      85              : }
      86              : 
      87              : 
      88              : /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
      89              :    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
      90              :    true.  */
      91              : 
      92              : static void
      93       112490 : rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
      94              : {
      95       112490 :   gimple *stmt;
      96       112490 :   use_operand_p use_p;
      97       112490 :   ssa_op_iter iter;
      98       112490 :   edge e;
      99       112490 :   edge_iterator ei;
     100       112490 :   class loop *loop = bb->loop_father;
     101       112490 :   class loop *outer_loop = NULL;
     102              : 
     103       112490 :   if (rename_from_outer_loop)
     104              :     {
     105          835 :       gcc_assert (loop);
     106          835 :       outer_loop = loop_outer (loop);
     107              :     }
     108              : 
     109       746813 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
     110       521833 :        gsi_next (&gsi))
     111              :     {
     112       521833 :       stmt = gsi_stmt (gsi);
     113      1285126 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
     114       763293 :         rename_use_op (use_p);
     115              :     }
     116              : 
     117       229991 :   FOR_EACH_EDGE (e, ei, bb->preds)
     118              :     {
     119       117501 :       if (!flow_bb_inside_loop_p (loop, e->src))
     120              :         {
     121        35053 :           if (!rename_from_outer_loop)
     122        34791 :             continue;
     123          262 :           if (e->src != outer_loop->header)
     124              :             {
     125          153 :               if (outer_loop->inner->next)
     126              :                 {
     127              :                   /* If outer_loop has 2 inner loops, allow there to
     128              :                      be an extra basic block which decides which of the
     129              :                      two loops to use using LOOP_VECTORIZED.  */
     130          150 :                   if (!single_pred_p (e->src)
     131           26 :                       || single_pred (e->src) != outer_loop->header)
     132          124 :                     continue;
     133              :                 }
     134              :             }
     135              :         }
     136       187374 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
     137       104788 :            gsi_next (&gsi))
     138       104788 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     139              :     }
     140       112490 : }
     141              : 
     142              : 
     143              : struct adjust_info
     144              : {
     145              :   tree from, to;
     146              :   basic_block bb;
     147              : };
     148              : 
     149              : /* A stack of values to be adjusted in debug stmts.  We have to
     150              :    process them LIFO, so that the closest substitution applies.  If we
     151              :    processed them FIFO, without the stack, we might substitute uses
     152              :    with a PHI DEF that would soon become non-dominant, and when we got
     153              :    to the suitable one, it wouldn't have anything to substitute any
     154              :    more.  */
     155              : static vec<adjust_info, va_heap> adjust_vec;
     156              : 
     157              : /* Adjust any debug stmts that referenced AI->from values to use the
     158              :    loop-closed AI->to, if the references are dominated by AI->bb and
     159              :    not by the definition of AI->from.  */
     160              : 
     161              : static void
     162        79252 : adjust_debug_stmts_now (adjust_info *ai)
     163              : {
     164        79252 :   basic_block bbphi = ai->bb;
     165        79252 :   tree orig_def = ai->from;
     166        79252 :   tree new_def = ai->to;
     167        79252 :   imm_use_iterator imm_iter;
     168        79252 :   gimple *stmt;
     169        79252 :   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
     170              : 
     171        79252 :   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
     172              : 
     173              :   /* Adjust any debug stmts that held onto non-loop-closed
     174              :      references.  */
     175       395581 :   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
     176              :     {
     177       237077 :       use_operand_p use_p;
     178       237077 :       basic_block bbuse;
     179              : 
     180       237077 :       if (!is_gimple_debug (stmt))
     181       179106 :         continue;
     182              : 
     183        57971 :       gcc_assert (gimple_debug_bind_p (stmt));
     184              : 
     185        57971 :       bbuse = gimple_bb (stmt);
     186              : 
     187        57971 :       if ((bbuse == bbphi
     188        57971 :            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
     189        59987 :           && !(bbuse == bbdef
     190         1008 :                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
     191              :         {
     192            0 :           if (new_def)
     193            0 :             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
     194            0 :               SET_USE (use_p, new_def);
     195              :           else
     196              :             {
     197            0 :               gimple_debug_bind_reset_value (stmt);
     198            0 :               update_stmt (stmt);
     199              :             }
     200              :         }
     201        79252 :     }
     202        79252 : }
     203              : 
     204              : /* Adjust debug stmts as scheduled before.  */
     205              : 
     206              : static void
     207        33780 : adjust_vec_debug_stmts (void)
     208              : {
     209        33780 :   if (!MAY_HAVE_DEBUG_BIND_STMTS)
     210              :     return;
     211              : 
     212        12468 :   gcc_assert (adjust_vec.exists ());
     213              : 
     214        91607 :   while (!adjust_vec.is_empty ())
     215              :     {
     216        79139 :       adjust_debug_stmts_now (&adjust_vec.last ());
     217        79139 :       adjust_vec.pop ();
     218              :     }
     219              : }
     220              : 
     221              : /* Adjust any debug stmts that referenced FROM values to use the
     222              :    loop-closed TO, if the references are dominated by BB and not by
     223              :    the definition of FROM.  If adjust_vec is non-NULL, adjustments
     224              :    will be postponed until adjust_vec_debug_stmts is called.  */
     225              : 
     226              : static void
     227       107246 : adjust_debug_stmts (tree from, tree to, basic_block bb)
     228              : {
     229       107246 :   adjust_info ai;
     230              : 
     231       107246 :   if (MAY_HAVE_DEBUG_BIND_STMTS
     232       107246 :       && TREE_CODE (from) == SSA_NAME
     233        97846 :       && ! SSA_NAME_IS_DEFAULT_DEF (from)
     234       203748 :       && ! virtual_operand_p (from))
     235              :     {
     236        79252 :       ai.from = from;
     237        79252 :       ai.to = to;
     238        79252 :       ai.bb = bb;
     239              : 
     240        79252 :       if (adjust_vec.exists ())
     241        79139 :         adjust_vec.safe_push (ai);
     242              :       else
     243          113 :         adjust_debug_stmts_now (&ai);
     244              :     }
     245       107246 : }
     246              : 
     247              : /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
     248              :    to adjust any debug stmts that referenced the old phi arg,
     249              :    presumably non-loop-closed references left over from other
     250              :    transformations.  */
     251              : 
     252              : static void
     253       218692 : adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
     254              : {
     255       218692 :   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
     256              : 
     257       218692 :   gcc_assert (TREE_CODE (orig_def) != SSA_NAME
     258              :               || orig_def != new_def);
     259              : 
     260       218692 :   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
     261              : 
     262       218692 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
     263        84046 :     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
     264              :                         gimple_bb (update_phi));
     265       218692 : }
     266              : 
     267              : /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
     268              :    that the control should have during the first iteration and NEXT_CTRL is the
     269              :    value that it should have on subsequent iterations.  */
     270              : 
     271              : static void
     272           86 : vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
     273              :                        tree next_ctrl)
     274              : {
     275           86 :   gphi *phi = create_phi_node (ctrl, loop->header);
     276           86 :   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
     277           86 :   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
     278           86 : }
     279              : 
     280              : /* Add SEQ to the end of LOOP's preheader block.  */
     281              : 
     282              : static void
     283           18 : add_preheader_seq (class loop *loop, gimple_seq seq)
     284              : {
     285           18 :   if (seq)
     286              :     {
     287           15 :       edge pe = loop_preheader_edge (loop);
     288           15 :       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     289           15 :       gcc_assert (!new_bb);
     290              :     }
     291           18 : }
     292              : 
     293              : /* Add SEQ to the beginning of LOOP's header block.  */
     294              : 
     295              : static void
     296            0 : add_header_seq (class loop *loop, gimple_seq seq)
     297              : {
     298            0 :   if (seq)
     299              :     {
     300            0 :       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
     301            0 :       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
     302              :     }
     303            0 : }
     304              : 
     305              : /* Return true if the target can interleave elements of two vectors.
     306              :    OFFSET is 0 if the first half of the vectors should be interleaved
     307              :    or 1 if the second half should.  When returning true, store the
     308              :    associated permutation in INDICES.  */
     309              : 
     310              : static bool
     311            0 : interleave_supported_p (vec_perm_indices *indices, tree vectype,
     312              :                         unsigned int offset)
     313              : {
     314            0 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
     315            0 :   poly_uint64 base = exact_div (nelts, 2) * offset;
     316            0 :   vec_perm_builder sel (nelts, 2, 3);
     317            0 :   for (unsigned int i = 0; i < 3; ++i)
     318              :     {
     319            0 :       sel.quick_push (base + i);
     320            0 :       sel.quick_push (base + i + nelts);
     321              :     }
     322            0 :   indices->new_vector (sel, 2, nelts);
     323            0 :   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
     324            0 :                                *indices);
     325            0 : }
     326              : 
     327              : /* Try to use permutes to define the masks in DEST_RGM using the masks
     328              :    in SRC_RGM, given that the former has twice as many masks as the
     329              :    latter.  Return true on success, adding any new statements to SEQ.  */
     330              : 
     331              : static bool
     332            0 : vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
     333              :                                rgroup_controls *src_rgm)
     334              : {
     335            0 :   tree src_masktype = src_rgm->type;
     336            0 :   tree dest_masktype = dest_rgm->type;
     337            0 :   machine_mode src_mode = TYPE_MODE (src_masktype);
     338            0 :   insn_code icode1, icode2;
     339            0 :   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
     340            0 :       && (icode1 = optab_handler (vec_unpacku_hi_optab,
     341              :                                   src_mode)) != CODE_FOR_nothing
     342            0 :       && (icode2 = optab_handler (vec_unpacku_lo_optab,
     343              :                                   src_mode)) != CODE_FOR_nothing)
     344              :     {
     345              :       /* Unpacking the source masks gives at least as many mask bits as
     346              :          we need.  We can then VIEW_CONVERT any excess bits away.  */
     347            0 :       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
     348            0 :       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
     349            0 :       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
     350            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     351              :         {
     352            0 :           tree src = src_rgm->controls[i / 2];
     353            0 :           tree dest = dest_rgm->controls[i];
     354            0 :           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
     355            0 :                             ? VEC_UNPACK_HI_EXPR
     356              :                             : VEC_UNPACK_LO_EXPR);
     357            0 :           gassign *stmt;
     358            0 :           if (dest_masktype == unpack_masktype)
     359            0 :             stmt = gimple_build_assign (dest, code, src);
     360              :           else
     361              :             {
     362            0 :               tree temp = make_ssa_name (unpack_masktype);
     363            0 :               stmt = gimple_build_assign (temp, code, src);
     364            0 :               gimple_seq_add_stmt (seq, stmt);
     365            0 :               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
     366              :                                           build1 (VIEW_CONVERT_EXPR,
     367              :                                                   dest_masktype, temp));
     368              :             }
     369            0 :           gimple_seq_add_stmt (seq, stmt);
     370              :         }
     371              :       return true;
     372              :     }
     373            0 :   vec_perm_indices indices[2];
     374            0 :   if (dest_masktype == src_masktype
     375            0 :       && interleave_supported_p (&indices[0], src_masktype, 0)
     376            0 :       && interleave_supported_p (&indices[1], src_masktype, 1))
     377              :     {
     378              :       /* The destination requires twice as many mask bits as the source, so
     379              :          we can use interleaving permutes to double up the number of bits.  */
     380              :       tree masks[2];
     381            0 :       for (unsigned int i = 0; i < 2; ++i)
     382            0 :         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
     383            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     384              :         {
     385            0 :           tree src = src_rgm->controls[i / 2];
     386            0 :           tree dest = dest_rgm->controls[i];
     387            0 :           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
     388            0 :                                               src, src, masks[i & 1]);
     389            0 :           gimple_seq_add_stmt (seq, stmt);
     390              :         }
     391            0 :       return true;
     392              :     }
     393              :   return false;
     394            0 : }
     395              : 
     396              : /* Populate DEST_RGM->controls, given that they should add up to STEP.
     397              : 
     398              :      STEP = MIN_EXPR <ivtmp_34, VF>;
     399              : 
     400              :      First length (MIN (X, VF/N)):
     401              :        loop_len_15 = MIN_EXPR <STEP, VF/N>;
     402              : 
     403              :      Second length:
     404              :        tmp = STEP - loop_len_15;
     405              :        loop_len_16 = MIN (tmp, VF/N);
     406              : 
     407              :      Third length:
     408              :        tmp2 = tmp - loop_len_16;
     409              :        loop_len_17 = MIN (tmp2, VF/N);
     410              : 
     411              :      Last length:
     412              :        loop_len_18 = tmp2 - loop_len_17;
     413              : */
     414              : 
     415              : static void
     416            0 : vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
     417              :                                rgroup_controls *dest_rgm, tree step)
     418              : {
     419            0 :   tree ctrl_type = dest_rgm->type;
     420            0 :   poly_uint64 nitems_per_ctrl
     421            0 :     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
     422            0 :   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
     423              : 
     424            0 :   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     425              :     {
     426            0 :       tree ctrl = dest_rgm->controls[i];
     427            0 :       if (i == 0)
     428              :         {
     429              :           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
     430            0 :           gassign *assign
     431            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     432            0 :           gimple_seq_add_stmt (seq, assign);
     433              :         }
     434            0 :       else if (i == dest_rgm->controls.length () - 1)
     435              :         {
     436              :           /* Last iteration: Remain capped to the range [0, VF/N].  */
     437            0 :           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
     438            0 :                                                  dest_rgm->controls[i - 1]);
     439            0 :           gimple_seq_add_stmt (seq, assign);
     440              :         }
     441              :       else
     442              :         {
     443              :           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
     444            0 :           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
     445            0 :                                dest_rgm->controls[i - 1]);
     446            0 :           gassign *assign
     447            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     448            0 :           gimple_seq_add_stmt (seq, assign);
     449              :         }
     450              :     }
     451            0 : }
     452              : 
     453              : /* Stores the standard position for induction variable increment in belonging to
     454              :    LOOP_EXIT (just before the exit condition of the given exit to BSI.
     455              :    INSERT_AFTER is set to true if the increment should be inserted after
     456              :    *BSI.  */
     457              : 
     458              : void
     459        61880 : vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
     460              :                             bool *insert_after)
     461              : {
     462        61880 :   basic_block bb = loop_exit->src;
     463        61880 :   *bsi = gsi_last_bb (bb);
     464        61880 :   *insert_after = false;
     465        61880 : }
     466              : 
     467              : /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
     468              :    for all the rgroup controls in RGC and return a control that is nonzero
     469              :    when the loop needs to iterate.  Add any new preheader statements to
     470              :    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
     471              : 
     472              :    RGC belongs to loop LOOP.  The loop originally iterated NITERS
     473              :    times and has been vectorized according to LOOP_VINFO.
     474              : 
     475              :    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
     476              :    starts with NITERS_SKIP dummy iterations of the scalar loop before
     477              :    the real work starts.  The mask elements for these dummy iterations
     478              :    must be 0, to ensure that the extra iterations do not have an effect.
     479              : 
     480              :    It is known that:
     481              : 
     482              :      NITERS * RGC->max_nscalars_per_iter * RGC->factor
     483              : 
     484              :    does not overflow.  However, MIGHT_WRAP_P says whether an induction
     485              :    variable that starts at 0 and has step:
     486              : 
     487              :      VF * RGC->max_nscalars_per_iter * RGC->factor
     488              : 
     489              :    might overflow before hitting a value above:
     490              : 
     491              :      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
     492              : 
     493              :    This means that we cannot guarantee that such an induction variable
     494              :    would ever hit a value that produces a set of all-false masks or zero
     495              :    lengths for RGC.
     496              : 
     497              :    Note: the cost of the code generated by this function is modeled
     498              :    by vect_estimate_min_profitable_iters, so changes here may need
     499              :    corresponding changes there.  */
     500              : 
     501              : static tree
     502            0 : vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
     503              :                                  gimple_seq *preheader_seq,
     504              :                                  gimple_seq *header_seq,
     505              :                                  gimple_stmt_iterator loop_cond_gsi,
     506              :                                  rgroup_controls *rgc, tree niters,
     507              :                                  tree niters_skip, bool might_wrap_p,
     508              :                                  tree *iv_step, tree *compare_step)
     509              : {
     510            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     511            0 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     512            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     513              : 
     514            0 :   tree ctrl_type = rgc->type;
     515            0 :   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
     516            0 :   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
     517            0 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     518            0 :   tree length_limit = NULL_TREE;
     519              :   /* For length, we need length_limit to ensure length in range.  */
     520            0 :   if (!use_masks_p)
     521            0 :     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
     522              : 
     523              :   /* Calculate the maximum number of item values that the rgroup
     524              :      handles in total, the number that it handles for each iteration
     525              :      of the vector loop, and the number that it should skip during the
     526              :      first iteration of the vector loop.  */
     527            0 :   tree nitems_total = niters;
     528            0 :   tree nitems_step = build_int_cst (iv_type, vf);
     529            0 :   tree nitems_skip = niters_skip;
     530            0 :   if (nitems_per_iter != 1)
     531              :     {
     532              :       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
     533              :          these multiplications don't overflow.  */
     534            0 :       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
     535            0 :       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
     536            0 :       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     537              :                                    nitems_total, compare_factor);
     538            0 :       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
     539              :                                   nitems_step, iv_factor);
     540            0 :       if (nitems_skip)
     541            0 :         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     542              :                                     nitems_skip, compare_factor);
     543              :     }
     544              : 
     545              :   /* Create an induction variable that counts the number of items
     546              :      processed.  */
     547            0 :   tree index_before_incr, index_after_incr;
     548            0 :   gimple_stmt_iterator incr_gsi;
     549            0 :   bool insert_after;
     550            0 :   edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
     551            0 :   vect_iv_increment_position (exit_e, &incr_gsi, &insert_after);
     552            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
     553              :     {
     554              :       /* Create an IV that counts down from niters_total and whose step
     555              :          is the (variable) amount processed in the current iteration:
     556              :            ...
     557              :            _10 = (unsigned long) count_12(D);
     558              :            ...
     559              :            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
     560              :            _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
     561              :            ...
     562              :            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
     563              :            ...
     564              :            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
     565              :            ...
     566              :            if (ivtmp_9 > POLY_INT_CST [4, 4])
     567              :              goto <bb 4>; [83.33%]
     568              :            else
     569              :              goto <bb 5>; [16.67%]
     570              :       */
     571            0 :       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
     572            0 :       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
     573            0 :                                                : make_ssa_name (iv_type);
     574              :       /* Create decrement IV.  */
     575            0 :       if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     576              :         {
     577            0 :           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
     578              :                      insert_after, &index_before_incr, &index_after_incr);
     579            0 :           tree vectype = build_zero_cst (rgc->type);
     580            0 :           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
     581              :                                    index_before_incr, nitems_step,
     582              :                                    vectype);
     583            0 :           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
     584              :         }
     585              :       else
     586              :         {
     587            0 :           create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
     588              :                      &incr_gsi, insert_after, &index_before_incr,
     589              :                      &index_after_incr);
     590            0 :           gimple_seq_add_stmt (header_seq,
     591            0 :                                gimple_build_assign (step, MIN_EXPR,
     592              :                                                     index_before_incr,
     593              :                                                     nitems_step));
     594              :         }
     595            0 :       *iv_step = step;
     596            0 :       *compare_step = nitems_step;
     597            0 :       return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
     598            0 :                                                        : index_before_incr;
     599              :     }
     600              : 
     601              :   /* Create increment IV.  */
     602            0 :   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
     603              :              loop, &incr_gsi, insert_after, &index_before_incr,
     604              :              &index_after_incr);
     605              : 
     606            0 :   tree zero_index = build_int_cst (compare_type, 0);
     607            0 :   tree test_index, test_limit, first_limit;
     608            0 :   gimple_stmt_iterator *test_gsi;
     609            0 :   if (might_wrap_p)
     610              :     {
     611              :       /* In principle the loop should stop iterating once the incremented
     612              :          IV reaches a value greater than or equal to:
     613              : 
     614              :            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
     615              : 
     616              :          However, there's no guarantee that this addition doesn't overflow
     617              :          the comparison type, or that the IV hits a value above it before
     618              :          wrapping around.  We therefore adjust the limit down by one
     619              :          IV step:
     620              : 
     621              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     622              :            -[infinite-prec] NITEMS_STEP
     623              : 
     624              :          and compare the IV against this limit _before_ incrementing it.
     625              :          Since the comparison type is unsigned, we actually want the
     626              :          subtraction to saturate at zero:
     627              : 
     628              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     629              :            -[sat] NITEMS_STEP
     630              : 
     631              :          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
     632              : 
     633              :            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
     634              : 
     635              :          where the rightmost subtraction can be done directly in
     636              :          COMPARE_TYPE.  */
     637            0 :       test_index = index_before_incr;
     638            0 :       tree adjust = gimple_convert (preheader_seq, compare_type,
     639              :                                     nitems_step);
     640            0 :       if (nitems_skip)
     641            0 :         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     642              :                                adjust, nitems_skip);
     643            0 :       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
     644              :                                  nitems_total, adjust);
     645            0 :       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     646              :                                  test_limit, adjust);
     647            0 :       test_gsi = &incr_gsi;
     648              : 
     649              :       /* Get a safe limit for the first iteration.  */
     650            0 :       if (nitems_skip)
     651              :         {
     652              :           /* The first vector iteration can handle at most NITEMS_STEP
     653              :              items.  NITEMS_STEP <= CONST_LIMIT, and adding
     654              :              NITEMS_SKIP to that cannot overflow.  */
     655            0 :           tree const_limit = build_int_cst (compare_type,
     656            0 :                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
     657            0 :                                             * nitems_per_iter);
     658            0 :           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
     659              :                                       nitems_total, const_limit);
     660            0 :           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     661              :                                       first_limit, nitems_skip);
     662              :         }
     663              :       else
     664              :         /* For the first iteration it doesn't matter whether the IV hits
     665              :            a value above NITEMS_TOTAL.  That only matters for the latch
     666              :            condition.  */
     667              :         first_limit = nitems_total;
     668              :     }
     669              :   else
     670              :     {
     671              :       /* Test the incremented IV, which will always hit a value above
     672              :          the bound before wrapping.  */
     673            0 :       test_index = index_after_incr;
     674            0 :       test_limit = nitems_total;
     675            0 :       if (nitems_skip)
     676            0 :         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     677              :                                    test_limit, nitems_skip);
     678              :       test_gsi = &loop_cond_gsi;
     679              : 
     680              :       first_limit = test_limit;
     681              :     }
     682              : 
     683              :   /* Convert the IV value to the comparison type (either a no-op or
     684              :      a demotion).  */
     685            0 :   gimple_seq test_seq = NULL;
     686            0 :   test_index = gimple_convert (&test_seq, compare_type, test_index);
     687            0 :   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
     688              : 
     689              :   /* Provide a definition of each control in the group.  */
     690            0 :   tree next_ctrl = NULL_TREE;
     691            0 :   tree ctrl;
     692            0 :   unsigned int i;
     693            0 :   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
     694              :     {
     695              :       /* Previous controls will cover BIAS items.  This control covers the
     696              :          next batch.  */
     697            0 :       poly_uint64 bias = nitems_per_ctrl * i;
     698            0 :       tree bias_tree = build_int_cst (compare_type, bias);
     699              : 
     700              :       /* See whether the first iteration of the vector loop is known
     701              :          to have a full control.  */
     702            0 :       poly_uint64 const_limit;
     703            0 :       bool first_iteration_full
     704            0 :         = (poly_int_tree_p (first_limit, &const_limit)
     705            0 :            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
     706              : 
     707              :       /* Rather than have a new IV that starts at BIAS and goes up to
     708              :          TEST_LIMIT, prefer to use the same 0-based IV for each control
     709              :          and adjust the bound down by BIAS.  */
     710            0 :       tree this_test_limit = test_limit;
     711            0 :       if (i != 0)
     712              :         {
     713            0 :           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
     714              :                                           compare_type, this_test_limit,
     715              :                                           bias_tree);
     716            0 :           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
     717              :                                           compare_type, this_test_limit,
     718              :                                           bias_tree);
     719              :         }
     720              : 
     721              :       /* Create the initial control.  First include all items that
     722              :          are within the loop limit.  */
     723            0 :       tree init_ctrl = NULL_TREE;
     724            0 :       if (!first_iteration_full)
     725              :         {
     726            0 :           tree start, end;
     727            0 :           if (first_limit == test_limit)
     728              :             {
     729              :               /* Use a natural test between zero (the initial IV value)
     730              :                  and the loop limit.  The "else" block would be valid too,
     731              :                  but this choice can avoid the need to load BIAS_TREE into
     732              :                  a register.  */
     733              :               start = zero_index;
     734              :               end = this_test_limit;
     735              :             }
     736              :           else
     737              :             {
     738              :               /* FIRST_LIMIT is the maximum number of items handled by the
     739              :                  first iteration of the vector loop.  Test the portion
     740              :                  associated with this control.  */
     741            0 :               start = bias_tree;
     742            0 :               end = first_limit;
     743              :             }
     744              : 
     745            0 :           if (use_masks_p)
     746            0 :             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
     747              :                                         start, end, "max_mask");
     748              :           else
     749              :             {
     750            0 :               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
     751            0 :               gimple_seq seq = vect_gen_len (init_ctrl, start,
     752              :                                              end, length_limit);
     753            0 :               gimple_seq_add_seq (preheader_seq, seq);
     754              :             }
     755              :         }
     756              : 
     757              :       /* Now AND out the bits that are within the number of skipped
     758              :          items.  */
     759            0 :       poly_uint64 const_skip;
     760            0 :       if (nitems_skip
     761            0 :           && !(poly_int_tree_p (nitems_skip, &const_skip)
     762            0 :                && known_le (const_skip, bias)))
     763              :         {
     764            0 :           gcc_assert (use_masks_p);
     765            0 :           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
     766              :                                                     bias_tree, nitems_skip);
     767            0 :           if (init_ctrl)
     768            0 :             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
     769              :                                       init_ctrl, unskipped_mask);
     770              :           else
     771              :             init_ctrl = unskipped_mask;
     772              :         }
     773              : 
     774            0 :       if (!init_ctrl)
     775              :         {
     776              :           /* First iteration is full.  */
     777            0 :           if (use_masks_p)
     778            0 :             init_ctrl = build_minus_one_cst (ctrl_type);
     779              :           else
     780              :             init_ctrl = length_limit;
     781              :         }
     782              : 
     783              :       /* Get the control value for the next iteration of the loop.  */
     784            0 :       if (use_masks_p)
     785              :         {
     786            0 :           gimple_seq stmts = NULL;
     787            0 :           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
     788              :                                       this_test_limit, "next_mask");
     789            0 :           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
     790              :         }
     791              :       else
     792              :         {
     793            0 :           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
     794            0 :           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
     795              :                                          length_limit);
     796            0 :           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
     797              :         }
     798              : 
     799            0 :       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
     800              :     }
     801              : 
     802            0 :   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
     803            0 :   if (partial_load_bias != 0)
     804              :     {
     805            0 :       tree adjusted_len = rgc->bias_adjusted_ctrl;
     806            0 :       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
     807            0 :                                             rgc->controls[0],
     808              :                                             build_int_cst
     809            0 :                                             (TREE_TYPE (rgc->controls[0]),
     810            0 :                                              partial_load_bias));
     811            0 :       gimple_seq_add_stmt (header_seq, minus);
     812              :     }
     813              : 
     814              :   return next_ctrl;
     815              : }
     816              : 
     817              : /* Set up the iteration condition and rgroup controls for LOOP, given
     818              :    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
     819              :    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     820              :    the number of iterations of the original scalar loop that should be
     821              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     822              :    for vect_set_loop_condition.
     823              : 
     824              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     825              :    final gcond.  */
     826              : 
     827              : static gcond *
     828            0 : vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
     829              :                                          loop_vec_info loop_vinfo, tree niters,
     830              :                                          tree final_iv, bool niters_maybe_zero,
     831              :                                          gimple_stmt_iterator loop_cond_gsi)
     832              : {
     833            0 :   gimple_seq preheader_seq = NULL;
     834            0 :   gimple_seq header_seq = NULL;
     835              : 
     836            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     837            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     838            0 :   unsigned int compare_precision = TYPE_PRECISION (compare_type);
     839            0 :   tree orig_niters = niters;
     840              : 
     841              :   /* Type of the initial value of NITERS.  */
     842            0 :   tree ni_actual_type = TREE_TYPE (niters);
     843            0 :   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
     844            0 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     845            0 :   if (niters_skip)
     846            0 :     niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
     847              : 
     848              :   /* Convert NITERS to the same size as the compare.  */
     849            0 :   if (compare_precision > ni_actual_precision
     850            0 :       && niters_maybe_zero)
     851              :     {
     852              :       /* We know that there is always at least one iteration, so if the
     853              :          count is zero then it must have wrapped.  Cope with this by
     854              :          subtracting 1 before the conversion and adding 1 to the result.  */
     855            0 :       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
     856            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
     857              :                              niters, build_minus_one_cst (ni_actual_type));
     858            0 :       niters = gimple_convert (&preheader_seq, compare_type, niters);
     859            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
     860              :                              niters, build_one_cst (compare_type));
     861              :     }
     862              :   else
     863            0 :     niters = gimple_convert (&preheader_seq, compare_type, niters);
     864              : 
     865              :   /* Iterate over all the rgroups and fill in their controls.  We could use
     866              :      the first control from any rgroup for the loop condition; here we
     867              :      arbitrarily pick the last.  */
     868            0 :   tree test_ctrl = NULL_TREE;
     869            0 :   tree iv_step = NULL_TREE;
     870            0 :   tree compare_step = NULL_TREE;
     871            0 :   rgroup_controls *rgc;
     872            0 :   rgroup_controls *iv_rgc = nullptr;
     873            0 :   unsigned int i;
     874            0 :   auto_vec<rgroup_controls> *controls = use_masks_p
     875            0 :                                           ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
     876              :                                           : &LOOP_VINFO_LENS (loop_vinfo);
     877            0 :   FOR_EACH_VEC_ELT (*controls, i, rgc)
     878            0 :     if (!rgc->controls.is_empty ())
     879              :       {
     880              :         /* First try using permutes.  This adds a single vector
     881              :            instruction to the loop for each mask, but needs no extra
     882              :            loop invariants or IVs.  */
     883            0 :         unsigned int nmasks = i + 1;
     884            0 :         if (use_masks_p && (nmasks & 1) == 0)
     885              :           {
     886            0 :             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
     887            0 :             if (!half_rgc->controls.is_empty ()
     888            0 :                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
     889            0 :               continue;
     890              :           }
     891              : 
     892            0 :         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     893            0 :             || !iv_rgc
     894            0 :             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
     895            0 :                 != rgc->max_nscalars_per_iter * rgc->factor))
     896              :           {
     897              :             /* See whether zero-based IV would ever generate all-false masks
     898              :                or zero length before wrapping around.  */
     899            0 :             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
     900              : 
     901              :             /* Set up all controls for this group.  */
     902            0 :             test_ctrl
     903            0 :               = vect_set_loop_controls_directly (loop, loop_vinfo,
     904              :                                                  &preheader_seq, &header_seq,
     905              :                                                  loop_cond_gsi, rgc, niters,
     906              :                                                  niters_skip, might_wrap_p,
     907              :                                                  &iv_step, &compare_step);
     908              : 
     909            0 :             iv_rgc = rgc;
     910              :           }
     911              : 
     912            0 :         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     913            0 :             && rgc->controls.length () > 1)
     914              :           {
     915              :             /* vect_set_loop_controls_directly creates an IV whose step
     916              :                is equal to the expected sum of RGC->controls.  Use that
     917              :                information to populate RGC->controls.  */
     918            0 :             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     919            0 :             gcc_assert (iv_step);
     920            0 :             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
     921              :           }
     922              :       }
     923              : 
     924              :   /* Emit all accumulated statements.  */
     925            0 :   add_preheader_seq (loop, preheader_seq);
     926            0 :   add_header_seq (loop, header_seq);
     927              : 
     928              :   /* Get a boolean result that tells us whether to iterate.  */
     929            0 :   gcond *cond_stmt;
     930            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     931            0 :       && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     932              :     {
     933            0 :       gcc_assert (compare_step);
     934            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
     935            0 :       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
     936              :                                      NULL_TREE);
     937            0 :     }
     938              :   else
     939              :     {
     940            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     941            0 :       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
     942            0 :       cond_stmt
     943            0 :         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
     944              :     }
     945            0 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     946              : 
     947              :   /* The loop iterates (NITERS - 1) / VF + 1 times.
     948              :      Subtract one from this to get the latch count.  */
     949            0 :   tree step = build_int_cst (compare_type,
     950            0 :                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     951            0 :   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
     952              :                                        build_minus_one_cst (compare_type));
     953            0 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
     954              :                                      niters_minus_one, step);
     955              : 
     956            0 :   if (final_iv)
     957              :     {
     958            0 :       gassign *assign;
     959              :       /* If vectorizing an inverted early break loop we have to restart the
     960              :          scalar loop at niters - vf.  This matches what we do in
     961              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
     962            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     963              :         {
     964            0 :           tree ftype = TREE_TYPE (orig_niters);
     965            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     966            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
     967              :         }
     968              :        else
     969            0 :         assign = gimple_build_assign (final_iv, orig_niters);
     970            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
     971              :     }
     972              : 
     973            0 :   return cond_stmt;
     974              : }
     975              : 
     976              : /* Set up the iteration condition and rgroup controls for LOOP in AVX512
     977              :    style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
     978              :    vectorized loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     979              :    the number of iterations of the original scalar loop that should be
     980              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     981              :    for vect_set_loop_condition.
     982              : 
     983              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     984              :    final gcond.  */
     985              : 
     986              : static gcond *
     987           18 : vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
     988              :                                          edge exit_edge,
     989              :                                          loop_vec_info loop_vinfo, tree niters,
     990              :                                          tree final_iv,
     991              :                                          bool niters_maybe_zero,
     992              :                                          gimple_stmt_iterator loop_cond_gsi)
     993              : {
     994           18 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     995           18 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     996           18 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     997           18 :   tree orig_niters = niters;
     998           18 :   gimple_seq preheader_seq = NULL;
     999              : 
    1000              :   /* Create an IV that counts down from niters and whose step
    1001              :      is the number of iterations processed in the current iteration.
    1002              :      Produce the controls with compares like the following.
    1003              : 
    1004              :        # iv_2 = PHI <niters, iv_3>
    1005              :        rem_4 = MIN <iv_2, VF>;
    1006              :        remv_6 = { rem_4, rem_4, rem_4, ... }
    1007              :        mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
    1008              :        iv_3 = iv_2 - VF;
    1009              :        if (iv_2 > VF)
    1010              :          continue;
    1011              : 
    1012              :      Where the constant is built with elements at most VF - 1 and
    1013              :      repetitions according to max_nscalars_per_iter which is guarnateed
    1014              :      to be the same within a group.  */
    1015              : 
    1016              :   /* Convert NITERS to the determined IV type.  */
    1017           18 :   if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
    1018           18 :       && niters_maybe_zero)
    1019              :     {
    1020              :       /* We know that there is always at least one iteration, so if the
    1021              :          count is zero then it must have wrapped.  Cope with this by
    1022              :          subtracting 1 before the conversion and adding 1 to the result.  */
    1023            0 :       gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
    1024            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
    1025            0 :                              niters, build_minus_one_cst (TREE_TYPE (niters)));
    1026            0 :       niters = gimple_convert (&preheader_seq, iv_type, niters);
    1027            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
    1028              :                              niters, build_one_cst (iv_type));
    1029              :     }
    1030              :   else
    1031           18 :     niters = gimple_convert (&preheader_seq, iv_type, niters);
    1032              : 
    1033              :   /* Bias the initial value of the IV in case we need to skip iterations
    1034              :      at the beginning.  */
    1035           18 :   tree niters_adj = niters;
    1036           18 :   if (niters_skip)
    1037              :     {
    1038            1 :       tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
    1039            1 :       niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
    1040              :                                  iv_type, niters, skip);
    1041              :     }
    1042              : 
    1043              :   /* The iteration step is the vectorization factor.  */
    1044           18 :   tree iv_step = build_int_cst (iv_type, vf);
    1045              : 
    1046              :   /* Create the decrement IV.  */
    1047           18 :   tree index_before_incr, index_after_incr;
    1048           18 :   gimple_stmt_iterator incr_gsi;
    1049           18 :   bool insert_after;
    1050           18 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1051           18 :   create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
    1052              :              &incr_gsi, insert_after, &index_before_incr,
    1053              :              &index_after_incr);
    1054              : 
    1055              :   /* Iterate over all the rgroups and fill in their controls.  */
    1056           80 :   for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
    1057              :     {
    1058           26 :       if (rgc.controls.is_empty ())
    1059            6 :         continue;
    1060              : 
    1061           20 :       tree ctrl_type = rgc.type;
    1062           20 :       poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
    1063              : 
    1064           20 :       tree vectype = rgc.compare_type;
    1065              : 
    1066              :       /* index_after_incr is the IV specifying the remaining iterations in
    1067              :          the next iteration.  */
    1068           20 :       tree rem = index_after_incr;
    1069              :       /* When the data type for the compare to produce the mask is
    1070              :          smaller than the IV type we need to saturate.  Saturate to
    1071              :          the smallest possible value (IV_TYPE) so we only have to
    1072              :          saturate once (CSE will catch redundant ones we add).  */
    1073           20 :       if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
    1074            9 :         rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1075              :                             UNKNOWN_LOCATION,
    1076            9 :                             MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
    1077           20 :       rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1078           20 :                             UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
    1079              : 
    1080              :       /* Build a data vector composed of the remaining iterations.  */
    1081           20 :       rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1082              :                                           UNKNOWN_LOCATION, vectype, rem);
    1083              : 
    1084              :       /* Provide a definition of each vector in the control group.  */
    1085           20 :       tree next_ctrl = NULL_TREE;
    1086           20 :       tree first_rem = NULL_TREE;
    1087           20 :       tree ctrl;
    1088           20 :       unsigned int i;
    1089          238 :       FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
    1090              :         {
    1091              :           /* Previous controls will cover BIAS items.  This control covers the
    1092              :              next batch.  */
    1093           86 :           poly_uint64 bias = nitems_per_ctrl * i;
    1094              : 
    1095              :           /* Build the constant to compare the remaining iters against,
    1096              :              this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
    1097              :              split into pieces.  */
    1098           86 :           unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
    1099           86 :           tree_vector_builder builder (vectype, n, 1);
    1100         1266 :           for (unsigned i = 0; i < n; ++i)
    1101              :             {
    1102         1180 :               unsigned HOST_WIDE_INT val
    1103         1180 :                 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
    1104         1180 :               gcc_assert (val < vf.to_constant ());
    1105         1180 :               builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
    1106              :             }
    1107           86 :           tree cmp_series = builder.build ();
    1108              : 
    1109              :           /* Create the initial control.  First include all items that
    1110              :              are within the loop limit.  */
    1111           86 :           tree init_ctrl = NULL_TREE;
    1112           86 :           poly_uint64 const_limit;
    1113              :           /* See whether the first iteration of the vector loop is known
    1114              :              to have a full control.  */
    1115           86 :           if (poly_int_tree_p (niters, &const_limit)
    1116           86 :               && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
    1117            1 :             init_ctrl = build_minus_one_cst (ctrl_type);
    1118              :           else
    1119              :             {
    1120              :               /* The remaining work items initially are niters.  Saturate,
    1121              :                  splat and compare.  */
    1122           85 :               if (!first_rem)
    1123              :                 {
    1124           19 :                   first_rem = niters;
    1125           19 :                   if (TYPE_PRECISION (TREE_TYPE (vectype))
    1126           19 :                       < TYPE_PRECISION (iv_type))
    1127            9 :                     first_rem = gimple_build (&preheader_seq,
    1128            9 :                                               MIN_EXPR, TREE_TYPE (first_rem),
    1129              :                                               first_rem, iv_step);
    1130           19 :                   first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
    1131              :                                               first_rem);
    1132           19 :                   first_rem = gimple_build_vector_from_val (&preheader_seq,
    1133              :                                                             vectype, first_rem);
    1134              :                 }
    1135           85 :               init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
    1136              :                                         cmp_series, first_rem);
    1137              :             }
    1138              : 
    1139              :           /* Now AND out the bits that are within the number of skipped
    1140              :              items.  */
    1141           86 :           poly_uint64 const_skip;
    1142           86 :           if (niters_skip
    1143           86 :               && !(poly_int_tree_p (niters_skip, &const_skip)
    1144            1 :                    && known_le (const_skip, bias)))
    1145              :             {
    1146              :               /* For integer mode masks it's cheaper to shift out the bits
    1147              :                  since that avoids loading a constant.  */
    1148            1 :               gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
    1149            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1150            1 :                                         lang_hooks.types.type_for_mode
    1151            1 :                                           (TYPE_MODE (ctrl_type), 1),
    1152              :                                         init_ctrl);
    1153              :               /* ???  But when the shift amount isn't constant this requires
    1154              :                  a round-trip to GRPs.  We could apply the bias to either
    1155              :                  side of the compare instead.  */
    1156            2 :               tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
    1157            1 :                                          TREE_TYPE (niters_skip), niters_skip,
    1158            1 :                                          build_int_cst (TREE_TYPE (niters_skip),
    1159            1 :                                                         bias));
    1160            2 :               shift = gimple_build (&preheader_seq, MULT_EXPR,
    1161            1 :                                     TREE_TYPE (niters_skip), shift,
    1162            1 :                                     build_int_cst (TREE_TYPE (niters_skip),
    1163            1 :                                                    rgc.max_nscalars_per_iter));
    1164            1 :               init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
    1165            1 :                                         TREE_TYPE (init_ctrl),
    1166              :                                         init_ctrl, shift);
    1167            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1168              :                                         ctrl_type, init_ctrl);
    1169              :             }
    1170              : 
    1171              :           /* Get the control value for the next iteration of the loop.  */
    1172           86 :           next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1173              :                                     UNKNOWN_LOCATION,
    1174              :                                     LT_EXPR, ctrl_type, cmp_series, rem);
    1175              : 
    1176           86 :           vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
    1177           86 :         }
    1178              :     }
    1179              : 
    1180              :   /* Emit all accumulated statements.  */
    1181           18 :   add_preheader_seq (loop, preheader_seq);
    1182              : 
    1183              :   /* Adjust the exit test using the decrementing IV.  */
    1184           18 :   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
    1185              :   /* When we peel for alignment with niter_skip != 0 this can
    1186              :      cause niter + niter_skip to wrap and since we are comparing the
    1187              :      value before the decrement here we get a false early exit.
    1188              :      We can't compare the value after decrement either because that
    1189              :      decrement could wrap as well as we're not doing a saturating
    1190              :      decrement.  To avoid this situation we force a larger
    1191              :      iv_type.  */
    1192           18 :   gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
    1193              :                                         NULL_TREE, NULL_TREE);
    1194           18 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1195              : 
    1196              :   /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
    1197              :      Subtract one from this to get the latch count.  */
    1198           18 :   tree niters_minus_one
    1199           18 :     = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
    1200              :                    build_minus_one_cst (TREE_TYPE (orig_niters)));
    1201           18 :   tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
    1202           18 :   if (niters_skip)
    1203            1 :     niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
    1204              :                                fold_convert (iv_type, niters_skip));
    1205           18 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
    1206              :                                      niters_adj2, iv_step);
    1207              : 
    1208           18 :   if (final_iv)
    1209              :     {
    1210            0 :       gassign *assign;
    1211              :       /* If vectorizing an inverted early break loop we have to restart the
    1212              :          scalar loop at niters - vf.  This matches what we do in
    1213              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
    1214            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    1215              :         {
    1216            0 :           tree ftype = TREE_TYPE (orig_niters);
    1217            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    1218            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
    1219              :         }
    1220              :        else
    1221            0 :         assign = gimple_build_assign (final_iv, orig_niters);
    1222            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
    1223              :     }
    1224              : 
    1225           18 :   return cond_stmt;
    1226              : }
    1227              : 
    1228              : 
    1229              : /* Like vect_set_loop_condition, but handle the case in which the vector
    1230              :    loop handles exactly VF scalars per iteration.  */
    1231              : 
    1232              : static gcond *
    1233        61795 : vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
    1234              :                                 class loop *loop, tree niters, tree step,
    1235              :                                 tree final_iv, bool niters_maybe_zero,
    1236              :                                 gimple_stmt_iterator loop_cond_gsi)
    1237              : {
    1238        61795 :   tree indx_before_incr, indx_after_incr;
    1239        61795 :   gcond *cond_stmt;
    1240        61795 :   gcond *orig_cond;
    1241        61795 :   edge pe = loop_preheader_edge (loop);
    1242        61795 :   gimple_stmt_iterator incr_gsi;
    1243        61795 :   bool insert_after;
    1244        61795 :   enum tree_code code;
    1245        61795 :   tree niters_type = TREE_TYPE (niters);
    1246              : 
    1247        61795 :   orig_cond = get_loop_exit_condition (exit_edge);
    1248        61795 :   gcc_assert (orig_cond);
    1249        61795 :   loop_cond_gsi = gsi_for_stmt (orig_cond);
    1250              : 
    1251        61795 :   tree init, limit;
    1252        61795 :   if (!niters_maybe_zero && integer_onep (step))
    1253              :     {
    1254              :       /* In this case we can use a simple 0-based IV:
    1255              : 
    1256              :          A:
    1257              :            x = 0;
    1258              :            do
    1259              :              {
    1260              :                ...
    1261              :                x += 1;
    1262              :              }
    1263              :            while (x < NITERS);  */
    1264        61795 :       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1265        61795 :       init = build_zero_cst (niters_type);
    1266        61795 :       limit = niters;
    1267              :     }
    1268              :   else
    1269              :     {
    1270              :       /* The following works for all values of NITERS except 0:
    1271              : 
    1272              :          B:
    1273              :            x = 0;
    1274              :            do
    1275              :              {
    1276              :                ...
    1277              :                x += STEP;
    1278              :              }
    1279              :            while (x <= NITERS - STEP);
    1280              : 
    1281              :          so that the loop continues to iterate if x + STEP - 1 < NITERS
    1282              :          but stops if x + STEP - 1 >= NITERS.
    1283              : 
    1284              :          However, if NITERS is zero, x never hits a value above NITERS - STEP
    1285              :          before wrapping around.  There are two obvious ways of dealing with
    1286              :          this:
    1287              : 
    1288              :          - start at STEP - 1 and compare x before incrementing it
    1289              :          - start at -1 and compare x after incrementing it
    1290              : 
    1291              :          The latter is simpler and is what we use.  The loop in this case
    1292              :          looks like:
    1293              : 
    1294              :          C:
    1295              :            x = -1;
    1296              :            do
    1297              :              {
    1298              :                ...
    1299              :                x += STEP;
    1300              :              }
    1301              :            while (x < NITERS - STEP);
    1302              : 
    1303              :          In both cases the loop limit is NITERS - STEP.  */
    1304            0 :       gimple_seq seq = NULL;
    1305            0 :       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
    1306            0 :       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
    1307            0 :       if (seq)
    1308              :         {
    1309            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    1310            0 :           gcc_assert (!new_bb);
    1311              :         }
    1312            0 :       if (niters_maybe_zero)
    1313              :         {
    1314              :           /* Case C.  */
    1315            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1316            0 :           init = build_all_ones_cst (niters_type);
    1317              :         }
    1318              :       else
    1319              :         {
    1320              :           /* Case B.  */
    1321            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
    1322            0 :           init = build_zero_cst (niters_type);
    1323              :         }
    1324              :     }
    1325              : 
    1326        61795 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1327        61795 :   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
    1328              :              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
    1329        61795 :   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
    1330              :                                               true, NULL_TREE, true,
    1331              :                                               GSI_SAME_STMT);
    1332        61795 :   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
    1333              :                                      true, GSI_SAME_STMT);
    1334              : 
    1335        61795 :   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
    1336              :                                  NULL_TREE);
    1337              : 
    1338        61795 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1339              : 
    1340              :   /* Record the number of latch iterations.  */
    1341        61795 :   if (limit == niters)
    1342              :     /* Case A: the loop iterates NITERS times.  Subtract one to get the
    1343              :        latch count.  */
    1344        61795 :     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
    1345              :                                        build_int_cst (niters_type, 1));
    1346              :   else
    1347              :     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
    1348              :        Subtract one from this to get the latch count.  */
    1349            0 :     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
    1350              :                                        limit, step);
    1351              : 
    1352        61795 :   if (final_iv)
    1353              :     {
    1354            0 :       gassign *assign;
    1355            0 :       gcc_assert (single_pred_p (exit_edge->dest));
    1356            0 :       tree phi_dest
    1357            0 :         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
    1358              :       /* Make sure to maintain LC SSA form here and elide the subtraction
    1359              :          if the value is zero.  */
    1360            0 :       gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
    1361            0 :       add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
    1362            0 :       if (!integer_zerop (init))
    1363              :         {
    1364            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR,
    1365              :                                         phi_dest, init);
    1366            0 :           gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
    1367            0 :           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
    1368              :         }
    1369              :     }
    1370              : 
    1371        61795 :   return cond_stmt;
    1372              : }
    1373              : 
    1374              : /* If we're using fully-masked loops, make LOOP iterate:
    1375              : 
    1376              :       N == (NITERS - 1) / STEP + 1
    1377              : 
    1378              :    times.  When NITERS is zero, this is equivalent to making the loop
    1379              :    execute (1 << M) / STEP times, where M is the precision of NITERS.
    1380              :    NITERS_MAYBE_ZERO is true if this last case might occur.
    1381              : 
    1382              :    If we're not using fully-masked loops, make LOOP iterate:
    1383              : 
    1384              :       N == (NITERS - STEP) / STEP + 1
    1385              : 
    1386              :    times, where NITERS is known to be outside the range [1, STEP - 1].
    1387              :    This is equivalent to making the loop execute NITERS / STEP times
    1388              :    when NITERS is nonzero and (1 << M) / STEP times otherwise.
    1389              :    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
    1390              : 
    1391              :    If FINAL_IV is nonnull, it is an SSA name that should be set to
    1392              :    N * STEP on exit from the loop.
    1393              : 
    1394              :    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
    1395              : 
    1396              : void
    1397        61813 : vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
    1398              :                          tree niters, tree step, tree final_iv,
    1399              :                          bool niters_maybe_zero)
    1400              : {
    1401        61813 :   gcond *cond_stmt;
    1402        61813 :   gcond *orig_cond = get_loop_exit_condition (loop_e);
    1403        61813 :   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
    1404              : 
    1405        61813 :   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    1406              :     {
    1407           18 :       if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
    1408           18 :         cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
    1409              :                                                                     loop_vinfo,
    1410              :                                                                     niters, final_iv,
    1411              :                                                                     niters_maybe_zero,
    1412              :                                                                     loop_cond_gsi);
    1413              :       else
    1414            0 :         cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
    1415              :                                                              loop_vinfo,
    1416              :                                                              niters, final_iv,
    1417              :                                                              niters_maybe_zero,
    1418              :                                                              loop_cond_gsi);
    1419              :     }
    1420              :   else
    1421        61795 :     cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
    1422              :                                                 niters,
    1423              :                                                 step, final_iv,
    1424              :                                                 niters_maybe_zero,
    1425              :                                                 loop_cond_gsi);
    1426              : 
    1427              :   /* Remove old loop exit test.  */
    1428        61813 :   stmt_vec_info orig_cond_info;
    1429        61813 :   if (loop_vinfo
    1430        61813 :       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
    1431        61375 :     loop_vinfo->remove_stmt (orig_cond_info);
    1432              :   else
    1433          438 :     gsi_remove (&loop_cond_gsi, true);
    1434              : 
    1435        61813 :   if (dump_enabled_p ())
    1436        11177 :     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
    1437              :                      (gimple *) cond_stmt);
    1438        61813 : }
    1439              : 
    1440              : /* Get the virtual operand live on E.  The precondition on this is valid
    1441              :    immediate dominators and an actual virtual definition dominating E.  */
    1442              : /* ???  Costly band-aid.  For the use in question we can populate a
    1443              :    live-on-exit/end-of-BB virtual operand when copying stmts.  */
    1444              : 
    1445              : static tree
    1446           10 : get_live_virtual_operand_on_edge (edge e)
    1447              : {
    1448           10 :   basic_block bb = e->src;
    1449           22 :   do
    1450              :     {
    1451           61 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    1452              :         {
    1453           37 :           gimple *stmt = gsi_stmt (gsi);
    1454           55 :           if (gimple_vdef (stmt))
    1455           10 :             return gimple_vdef (stmt);
    1456           39 :           if (gimple_vuse (stmt))
    1457              :             return gimple_vuse (stmt);
    1458              :         }
    1459            8 :       if (gphi *vphi = get_virtual_phi (bb))
    1460            2 :         return gimple_phi_result (vphi);
    1461            6 :       bb = get_immediate_dominator (CDI_DOMINATORS, bb);
    1462            6 :     }
    1463              :   while (1);
    1464              : }
    1465              : 
    1466              : /* Given LOOP this function generates a new copy of it and puts it
    1467              :    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
    1468              :    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
    1469              :    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
    1470              :    entry or exit of LOOP.  If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
    1471              :    continuation.  This is correct for cases where one loop continues from the
    1472              :    other like in the vectorizer, but not true for uses in e.g. loop distribution
    1473              :    where the contents of the loop body are split but the iteration space of both
    1474              :    copies remains the same.
    1475              : 
    1476              :    If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
    1477              :    dominators were updated during the peeling.  When doing early break vectorization
    1478              :    then LOOP_VINFO needs to be provided and is used to keep track of any newly created
    1479              :    memory references that need to be updated should we decide to vectorize.  */
    1480              : 
    1481              : class loop *
    1482        34915 : slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
    1483              :                                         class loop *scalar_loop,
    1484              :                                         edge scalar_exit, edge e, edge *new_e,
    1485              :                                         bool flow_loops,
    1486              :                                         vec<basic_block> *updated_doms,
    1487              :                                         bool uncounted_p, bool create_main_e)
    1488              : {
    1489        34915 :   class loop *new_loop;
    1490        34915 :   basic_block *new_bbs, *bbs, *pbbs;
    1491        34915 :   bool at_exit;
    1492        34915 :   bool was_imm_dom;
    1493        34915 :   basic_block exit_dest;
    1494        34915 :   edge exit, new_exit;
    1495        34915 :   bool duplicate_outer_loop = false;
    1496              : 
    1497        34915 :   exit = loop_exit;
    1498        34915 :   at_exit = (e == exit);
    1499        34915 :   if (!at_exit && e != loop_preheader_edge (loop))
    1500              :     return NULL;
    1501              : 
    1502        34915 :   if (scalar_loop == NULL)
    1503              :     {
    1504        32394 :       scalar_loop = loop;
    1505        32394 :       scalar_exit = loop_exit;
    1506              :     }
    1507         2521 :   else if (scalar_loop == loop)
    1508            0 :     scalar_exit = loop_exit;
    1509              :   else
    1510              :     {
    1511              :       /* Loop has been version, match exits up using the aux index.  */
    1512         7563 :       for (edge exit : get_loop_exit_edges (scalar_loop))
    1513         2521 :         if (exit->aux == loop_exit->aux)
    1514              :           {
    1515         2521 :             scalar_exit = exit;
    1516         2521 :             break;
    1517         2521 :           }
    1518              : 
    1519         2521 :       gcc_assert (scalar_exit);
    1520              :     }
    1521              : 
    1522        34915 :   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1523        34915 :   pbbs = bbs + 1;
    1524        34915 :   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
    1525              :   /* Allow duplication of outer loops.  */
    1526        34915 :   if (scalar_loop->inner)
    1527          124 :     duplicate_outer_loop = true;
    1528              : 
    1529              :   /* Generate new loop structure.  */
    1530        34915 :   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
    1531        34915 :   duplicate_subloops (scalar_loop, new_loop);
    1532              : 
    1533        34915 :   exit_dest = exit->dest;
    1534        34915 :   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
    1535        34915 :                                           exit_dest) == exit->src ?
    1536              :                  true : false);
    1537              : 
    1538              :   /* Also copy the pre-header, this avoids jumping through hoops to
    1539              :      duplicate the loop entry PHI arguments.  Create an empty
    1540              :      pre-header unconditionally for this.  */
    1541        34915 :   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
    1542        34915 :   edge entry_e = single_pred_edge (preheader);
    1543        34915 :   bbs[0] = preheader;
    1544        34915 :   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1545              : 
    1546        34915 :   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
    1547              :             &scalar_exit, 1, &new_exit, NULL,
    1548              :             at_exit ? loop->latch : e->src, true);
    1549        34915 :   exit = loop_exit;
    1550        34915 :   basic_block new_preheader = new_bbs[0];
    1551              : 
    1552        34915 :   gcc_assert (new_exit);
    1553              : 
    1554              :   /* Record the new loop exit information.  new_loop doesn't have SCEV data and
    1555              :      so we must initialize the exit information.  */
    1556        34915 :   if (new_e)
    1557        33780 :     *new_e = new_exit;
    1558              : 
    1559              :   /* Before installing PHI arguments make sure that the edges
    1560              :      into them match that of the scalar loop we analyzed.  This
    1561              :      makes sure the SLP tree matches up between the main vectorized
    1562              :      loop and the epilogue vectorized copies.  */
    1563        34915 :   if (single_succ_edge (preheader)->dest_idx
    1564        34915 :       != single_succ_edge (new_bbs[0])->dest_idx)
    1565              :     {
    1566        29916 :       basic_block swap_bb = new_bbs[1];
    1567        29916 :       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1568        29916 :       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1569        29916 :       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1570        29916 :       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1571              :     }
    1572        34915 :   if (duplicate_outer_loop)
    1573              :     {
    1574          124 :       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
    1575          124 :       if (loop_preheader_edge (scalar_loop)->dest_idx
    1576          124 :           != loop_preheader_edge (new_inner_loop)->dest_idx)
    1577              :         {
    1578           99 :           basic_block swap_bb = new_inner_loop->header;
    1579           99 :           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1580           99 :           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1581           99 :           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1582           99 :           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1583              :         }
    1584              :     }
    1585              : 
    1586        34915 :   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
    1587              : 
    1588              :   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
    1589       182320 :   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
    1590       112490 :     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
    1591              : 
    1592              :   /* Rename the exit uses.  */
    1593       141431 :   for (edge exit : get_loop_exit_edges (new_loop))
    1594        36686 :     for (auto gsi = gsi_start_phis (exit->dest);
    1595        85318 :          !gsi_end_p (gsi); gsi_next (&gsi))
    1596              :       {
    1597        48632 :         tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
    1598        48632 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
    1599        48632 :         if (MAY_HAVE_DEBUG_BIND_STMTS)
    1600        23200 :           adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
    1601        34915 :       }
    1602              : 
    1603        34915 :   auto loop_exits = get_loop_exit_edges (loop);
    1604        34915 :   bool multiple_exits_p = loop_exits.length () > 1;
    1605        34915 :   auto_vec<basic_block> doms;
    1606              : 
    1607        34915 :   if (at_exit) /* Add the loop copy at exit.  */
    1608              :     {
    1609        33342 :       if (scalar_loop != loop && new_exit->dest != exit_dest)
    1610              :         {
    1611         2517 :           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
    1612         2517 :           flush_pending_stmts (new_exit);
    1613              :         }
    1614              : 
    1615        33342 :       bool need_virtual_phi = get_virtual_phi (loop->header);
    1616              : 
    1617              :       /* For the main loop exit preserve the LC PHI nodes.  For vectorization
    1618              :          we need them to continue or finalize reductions.  Since we do not
    1619              :          copy the loop exit blocks we have to materialize PHIs at the
    1620              :          new destination before redirecting edges.  */
    1621        33342 :       for (auto gsi_from = gsi_start_phis (loop_exit->dest);
    1622        78962 :            !gsi_end_p (gsi_from); gsi_next (&gsi_from))
    1623              :         {
    1624        45620 :           tree res = gimple_phi_result (*gsi_from);
    1625        45620 :           create_phi_node (copy_ssa_name (res), new_preheader);
    1626              :         }
    1627        33342 :       edge e = redirect_edge_and_branch (loop_exit, new_preheader);
    1628        33342 :       gcc_assert (e == loop_exit);
    1629        33342 :       flush_pending_stmts (loop_exit);
    1630        33342 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
    1631              : 
    1632              :       /* If we ended up choosing an exit leading to a path not using memory
    1633              :          we can end up without a virtual LC PHI.  Create it when it is
    1634              :          needed because of the epilog loop continuation.  */
    1635        33342 :       if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
    1636              :         {
    1637            8 :           tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
    1638            8 :           gphi *vphi = create_phi_node (copy_ssa_name (header_def),
    1639              :                                         new_preheader);
    1640            8 :           add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
    1641              :                        loop_exit, UNKNOWN_LOCATION);
    1642              :         }
    1643              : 
    1644        33342 :       bool multiple_exits_p = loop_exits.length () > 1;
    1645        33342 :       basic_block main_loop_exit_block = new_preheader;
    1646        33342 :       basic_block alt_loop_exit_block = NULL;
    1647              :       /* Create the CFG for multiple exits.
    1648              :                    | loop_exit               | alt1   | altN
    1649              :                    v                         v   ...  v
    1650              :             main_loop_exit_block:       alt_loop_exit_block:
    1651              :                    |                      /
    1652              :                    v                     v
    1653              :             new_preheader:
    1654              :          where in the new preheader we need merge PHIs for
    1655              :          the continuation values into the epilogue header.
    1656              :          Do not bother with exit PHIs for the early exits but
    1657              :          their live virtual operand.  We'll fix up things below.  */
    1658        33342 :       if (multiple_exits_p || uncounted_p)
    1659              :         {
    1660         1406 :           edge loop_e = single_succ_edge (new_preheader);
    1661         1406 :           new_preheader = split_edge (loop_e);
    1662              : 
    1663         1406 :           gphi *vphi = NULL;
    1664         1406 :           alt_loop_exit_block = new_preheader;
    1665         7170 :           for (auto exit : loop_exits)
    1666         2952 :             if (exit != loop_exit)
    1667              :               {
    1668         1546 :                 tree vphi_def = NULL_TREE;
    1669         1546 :                 if (gphi *evphi = get_virtual_phi (exit->dest))
    1670          602 :                   vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
    1671         1546 :                 edge res = redirect_edge_and_branch (exit, alt_loop_exit_block);
    1672         1546 :                 gcc_assert (res == exit);
    1673         1546 :                 redirect_edge_var_map_clear (exit);
    1674         1546 :                 if (alt_loop_exit_block == new_preheader)
    1675         1377 :                   alt_loop_exit_block = split_edge (exit);
    1676         1546 :                 if (!need_virtual_phi)
    1677         1102 :                   continue;
    1678              :                 /* When the edge has no virtual LC PHI get at the live
    1679              :                    virtual operand by other means.  */
    1680          444 :                 if (!vphi_def)
    1681            2 :                   vphi_def = get_live_virtual_operand_on_edge (exit);
    1682          444 :                 if (!vphi)
    1683          410 :                   vphi = create_phi_node (copy_ssa_name (vphi_def),
    1684              :                                           alt_loop_exit_block);
    1685              :                 else
    1686              :                   /* Edge redirection might re-allocate the PHI node
    1687              :                      so we have to rediscover it.  */
    1688           34 :                   vphi = get_virtual_phi (alt_loop_exit_block);
    1689          444 :                 add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
    1690              :               }
    1691              : 
    1692         1406 :           set_immediate_dominator (CDI_DOMINATORS, new_preheader,
    1693              :                                    loop->header);
    1694              : 
    1695              :           /* Fix up the profile counts of the new exit blocks.
    1696              :              main_loop_exit_block was created by duplicating the
    1697              :              preheader, so needs its count scaling according to the main
    1698              :              exit edge's probability.  The remaining count from the
    1699              :              preheader goes to the alt_loop_exit_block, since all
    1700              :              alternative exits have been redirected there.  */
    1701         1406 :           main_loop_exit_block->count = loop_exit->count ();
    1702         1406 :           alt_loop_exit_block->count
    1703         1406 :             = preheader->count - main_loop_exit_block->count;
    1704              :         }
    1705              : 
    1706              :       /* Adjust the epilog loop PHI entry values to continue iteration.
    1707              :          This adds remaining necessary LC PHI nodes to the main exit
    1708              :          and creates merge PHIs when we have multiple exits with
    1709              :          their appropriate continuation.  */
    1710        33342 :       if (flow_loops)
    1711              :         {
    1712        33342 :           edge loop_entry = single_succ_edge (new_preheader);
    1713        33342 :           bool peeled_iters = (uncounted_p
    1714        33342 :                                || single_pred (loop->latch) != loop_exit->src);
    1715              : 
    1716              :           /* Record the new SSA names in the cache so that we can skip
    1717              :              materializing them again when we fill in the rest of the LC SSA
    1718              :              variables.  */
    1719        33342 :           hash_map <tree, tree> new_phi_args;
    1720        33342 :           for (auto psi = gsi_start_phis (main_loop_exit_block);
    1721        78970 :                !gsi_end_p (psi); gsi_next (&psi))
    1722              :             {
    1723        45628 :               gphi *phi = *psi;
    1724        45628 :               tree new_arg = gimple_phi_arg_def_from_edge (phi, loop_exit);
    1725        45628 :               if (TREE_CODE (new_arg) != SSA_NAME)
    1726          384 :                 continue;
    1727              : 
    1728              :               /* If the loop doesn't have a virtual def then only possibly keep
    1729              :                  the epilog LC PHI for it and avoid creating new defs.  */
    1730        45338 :               if (virtual_operand_p (new_arg) && !need_virtual_phi)
    1731              :                 {
    1732           94 :                   auto gsi = gsi_for_stmt (phi);
    1733           94 :                   remove_phi_node (&gsi, true);
    1734           94 :                   continue;
    1735           94 :                 }
    1736              : 
    1737              :               /* If we decided not to remove the PHI node we should also not
    1738              :                  rematerialize it later on.  */
    1739        45244 :               new_phi_args.put (new_arg, gimple_phi_result (phi));
    1740              :             }
    1741              : 
    1742              :           /* Create the merge PHI nodes in new_preheader and populate the
    1743              :              arguments for the exits.  */
    1744        33342 :           if (multiple_exits_p || uncounted_p)
    1745              :             {
    1746         1406 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1747         1406 :                    gsi_to = gsi_start_phis (new_loop->header);
    1748         4454 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1749         3048 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1750              :                 {
    1751         3048 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1752         3048 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1753              : 
    1754              :                   /* When the vector loop is peeled then we need to use the
    1755              :                      value at start of the loop, otherwise the main loop exit
    1756              :                      should use the final iter value.  */
    1757         3048 :                   tree new_arg;
    1758         3048 :                   if (peeled_iters)
    1759          123 :                     new_arg = gimple_phi_result (from_phi);
    1760              :                   else
    1761         2925 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1762              :                                                      loop_latch_edge (loop));
    1763              : 
    1764              :                   /* Check if we've already created a new phi node during edge
    1765              :                      redirection and re-use it if so.  Otherwise create a
    1766              :                      LC PHI node to feed the merge PHI.  */
    1767         3048 :                   tree *res;
    1768         6096 :                   if (virtual_operand_p (new_arg))
    1769              :                     {
    1770              :                       /* Use the existing virtual LC SSA from exit block.  */
    1771          422 :                       gphi *vphi = get_virtual_phi (main_loop_exit_block);
    1772          422 :                       new_arg = gimple_phi_result (vphi);
    1773              :                     }
    1774         2626 :                   else if ((res = new_phi_args.get (new_arg)))
    1775          105 :                     new_arg = *res;
    1776              :                   else
    1777              :                     {
    1778              :                       /* Create the LC PHI node for the exit.  */
    1779         2521 :                       tree new_def = copy_ssa_name (new_arg);
    1780         2521 :                       gphi *lc_phi
    1781         2521 :                           = create_phi_node (new_def, main_loop_exit_block);
    1782         2521 :                       SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
    1783         2521 :                       new_arg = new_def;
    1784              :                     }
    1785              : 
    1786              :                   /* Create the PHI node in the merge block merging the
    1787              :                      main and early exit values.  */
    1788         3048 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1789         3048 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1790         3048 :                   edge main_e = single_succ_edge (main_loop_exit_block);
    1791         3048 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
    1792              : 
    1793              :                   /* And adjust the epilog entry value.  */
    1794         3048 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1795              :                 }
    1796              :             }
    1797              : 
    1798        33342 :           if (multiple_exits_p)
    1799              :             {
    1800              :               /* After creating the merge PHIs handle the early exits those
    1801              :                  should use the values at the start of the loop.  */
    1802         1377 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1803         1377 :                    gsi_to = gsi_start_phis (new_preheader);
    1804         4373 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1805         2996 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1806              :                 {
    1807         2996 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1808         2996 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1809              : 
    1810              :                   /* Now update the virtual PHI nodes with the right value.  */
    1811         2996 :                   tree alt_arg = gimple_phi_result (from_phi);
    1812         5992 :                   if (virtual_operand_p (alt_arg))
    1813              :                     {
    1814          410 :                       gphi *vphi = get_virtual_phi (alt_loop_exit_block);
    1815          410 :                       alt_arg = gimple_phi_result (vphi);
    1816              :                     }
    1817              :                   /* For other live args we didn't create LC PHI nodes.
    1818              :                      Do so here.  */
    1819              :                   else
    1820              :                     {
    1821         2586 :                       tree alt_def = copy_ssa_name (alt_arg);
    1822         2586 :                       gphi *lc_phi
    1823         2586 :                         = create_phi_node (alt_def, alt_loop_exit_block);
    1824         5509 :                       for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
    1825              :                            ++i)
    1826         2923 :                         SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
    1827              :                       alt_arg = alt_def;
    1828              :                     }
    1829         2996 :                   edge alt_e = single_succ_edge (alt_loop_exit_block);
    1830         2996 :                   SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
    1831              :                 }
    1832              :             }
    1833              :           /* For the single exit case only create the missing LC PHI nodes
    1834              :              for the continuation of the loop IVs that are not also already
    1835              :              reductions and thus had LC PHI nodes on the exit already.  */
    1836        33342 :           if (!multiple_exits_p && !uncounted_p)
    1837              :             {
    1838        31936 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1839        31936 :                    gsi_to = gsi_start_phis (new_loop->header);
    1840       122211 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1841        90275 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1842              :                 {
    1843        90275 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1844        90275 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1845        90275 :                   tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1846              :                                                         loop_latch_edge (loop));
    1847              : 
    1848              :                   /* Check if we've already created a new phi node during edge
    1849              :                      redirection.  If we have, only propagate the value
    1850              :                      downwards.  */
    1851        90275 :                   if (tree *res = new_phi_args.get (new_arg))
    1852              :                     {
    1853        42522 :                       adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
    1854        42522 :                       continue;
    1855              :                     }
    1856              : 
    1857        47753 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1858        47753 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1859        47753 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
    1860        47753 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1861              :                 }
    1862              :             }
    1863        33342 :         }
    1864              : 
    1865        33342 :       if (was_imm_dom || duplicate_outer_loop)
    1866        32992 :         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
    1867              : 
    1868              :       /* And remove the non-necessary forwarder again.  Keep the other
    1869              :          one so we have a proper pre-header for the loop at the exit edge.  */
    1870        33342 :       redirect_edge_pred (single_succ_edge (preheader),
    1871              :                           single_pred (preheader));
    1872        33342 :       delete_basic_block (preheader);
    1873        33342 :       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1874        33342 :                                loop_preheader_edge (scalar_loop)->src);
    1875              : 
    1876              :       /* Finally after wiring the new epilogue we need to update its main exit
    1877              :          to the original function exit we recorded.  Other exits are already
    1878              :          correct.  */
    1879        33342 :       if (multiple_exits_p || uncounted_p)
    1880              :         {
    1881         1406 :           class loop *update_loop = new_loop;
    1882         1406 :           doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
    1883        73580 :           for (unsigned i = 0; i < doms.length (); ++i)
    1884        72174 :             if (flow_bb_inside_loop_p (loop, doms[i]))
    1885         4341 :               doms.unordered_remove (i);
    1886              : 
    1887         7170 :           for (edge e : get_loop_exit_edges (update_loop))
    1888              :             {
    1889         2952 :               edge ex;
    1890         2952 :               edge_iterator ei;
    1891         5870 :               FOR_EACH_EDGE (ex, ei, e->dest->succs)
    1892              :                 {
    1893              :                   /* Find the first non-fallthrough block as fall-throughs can't
    1894              :                      dominate other blocks.  */
    1895         2918 :                   if (single_succ_p (ex->dest))
    1896              :                     {
    1897         1414 :                       doms.safe_push (ex->dest);
    1898         1414 :                       ex = single_succ_edge (ex->dest);
    1899              :                     }
    1900         2918 :                   doms.safe_push (ex->dest);
    1901              :                 }
    1902         2952 :               doms.safe_push (e->dest);
    1903         1406 :             }
    1904              : 
    1905         1406 :           iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    1906         1406 :           if (updated_doms)
    1907         1406 :             updated_doms->safe_splice (doms);
    1908              :         }
    1909              :     }
    1910              :   else /* Add the copy at entry.  */
    1911              :     {
    1912              :       /* Copy the current loop LC PHI nodes between the original loop exit
    1913              :          block and the new loop header.  This allows us to later split the
    1914              :          preheader block and still find the right LC nodes.  */
    1915         1573 :       if (flow_loops)
    1916          438 :         for (auto gsi_from = gsi_start_phis (new_loop->header),
    1917          438 :              gsi_to = gsi_start_phis (loop->header);
    1918         1361 :              !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1919          923 :              gsi_next (&gsi_from), gsi_next (&gsi_to))
    1920              :           {
    1921          923 :             gimple *from_phi = gsi_stmt (gsi_from);
    1922          923 :             gimple *to_phi = gsi_stmt (gsi_to);
    1923          923 :             tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1924              :                                                   loop_latch_edge (new_loop));
    1925          923 :             adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
    1926              :                                         new_arg);
    1927              :           }
    1928              : 
    1929         1573 :       if (scalar_loop != loop)
    1930              :         {
    1931              :           /* Remove the non-necessary forwarder of scalar_loop again.  */
    1932            4 :           redirect_edge_pred (single_succ_edge (preheader),
    1933              :                               single_pred (preheader));
    1934            4 :           delete_basic_block (preheader);
    1935            4 :           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1936            4 :                                    loop_preheader_edge (scalar_loop)->src);
    1937            4 :           preheader = split_edge (loop_preheader_edge (loop));
    1938            4 :           entry_e = single_pred_edge (preheader);
    1939              :         }
    1940              : 
    1941         1573 :       redirect_edge_and_branch_force (entry_e, new_preheader);
    1942         1573 :       flush_pending_stmts (entry_e);
    1943         1573 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
    1944              : 
    1945              : 
    1946              :       /* `vect_set_loop_condition' replaces the condition in the main exit of
    1947              :          loop.  For counted loops, this is the IV counting exit, so in the case
    1948              :          of the prolog loop, we are replacing the old IV counting exit limit of
    1949              :          total loop niters for the new limit of the prolog niters, as desired.
    1950              :          For uncounted loops, we don't have an IV-counting exit to replace, so
    1951              :          we add a dummy exit to be consumed by `vect_set_loop_condition' later
    1952              :          on.  */
    1953         1573 :       if (create_main_e)
    1954              :         {
    1955           31 :           edge to_latch_e = single_pred_edge (new_loop->latch);
    1956           31 :           bool latch_is_false = to_latch_e->flags & EDGE_FALSE_VALUE ? true
    1957              :                                                                      : false;
    1958              : 
    1959              :           /* Add new bb for duplicate exit.  */
    1960           31 :           basic_block bbcond = split_edge (to_latch_e);
    1961           31 :           gimple_stmt_iterator a = gsi_last_bb (bbcond);
    1962              : 
    1963              :           /* Fix flags for the edge leading to the latch.  */
    1964           31 :           to_latch_e = find_edge (bbcond, new_loop->latch);
    1965           31 :           to_latch_e->flags &= ~EDGE_FALLTHRU;
    1966           31 :           to_latch_e->flags |= latch_is_false ? EDGE_FALSE_VALUE
    1967              :                                               : EDGE_TRUE_VALUE;
    1968              : 
    1969              :           /* Build the condition.  */
    1970           31 :           tree cone = build_int_cst (sizetype, 1);
    1971           31 :           tree czero = build_int_cst (sizetype, 0);
    1972           31 :           gcond *cond_copy = gimple_build_cond (NE_EXPR, cone, czero, NULL_TREE,
    1973              :                                                 NULL_TREE);
    1974              : 
    1975           31 :           gsi_insert_after (&a, cond_copy, GSI_NEW_STMT);
    1976              : 
    1977              :           /* Add edge for exiting the loop via new condition.  */
    1978           38 :           edge dup_exit = make_edge (bbcond, new_exit->dest, latch_is_false
    1979              :                                 ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE);
    1980              : 
    1981           31 :           profile_probability probability = profile_probability::even ();
    1982           31 :           to_latch_e->probability = dup_exit->probability = probability;
    1983              : 
    1984           31 :           set_immediate_dominator (CDI_DOMINATORS, dup_exit->src,
    1985              :                                    new_exit->src);
    1986           31 :           new_exit = dup_exit;
    1987           31 :           *new_e = new_exit;
    1988              :         }
    1989              : 
    1990         1573 :       redirect_edge_and_branch_force (new_exit, preheader);
    1991         1573 :       flush_pending_stmts (new_exit);
    1992         1573 :       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
    1993              : 
    1994              :       /* And remove the non-necessary forwarder again.  Keep the other
    1995              :          one so we have a proper pre-header for the loop at the exit edge.  */
    1996         1573 :       redirect_edge_pred (single_succ_edge (new_preheader),
    1997              :                           single_pred (new_preheader));
    1998         1573 :       delete_basic_block (new_preheader);
    1999         1573 :       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
    2000         1573 :                                loop_preheader_edge (new_loop)->src);
    2001              : 
    2002              :       /* Update dominators for multiple exits.  */
    2003         1573 :       if (multiple_exits_p || create_main_e)
    2004              :         {
    2005         1153 :           for (edge alt_e : loop_exits)
    2006              :             {
    2007          457 :               if ((alt_e == loop_exit) && !create_main_e)
    2008          201 :                 continue;
    2009          256 :               basic_block old_dom
    2010          256 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    2011          256 :               if (flow_bb_inside_loop_p (loop, old_dom))
    2012              :                 {
    2013          101 :                   auto_vec<basic_block, 8> queue;
    2014          101 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    2015          325 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    2016          224 :                     if (!flow_bb_inside_loop_p (loop, son))
    2017          123 :                       queue.safe_push (son);
    2018          426 :                   for (auto son : queue)
    2019          123 :                     set_immediate_dominator (CDI_DOMINATORS,
    2020              :                                              son, get_bb_copy (old_dom));
    2021          101 :                 }
    2022              :             }
    2023              :         }
    2024              : 
    2025              :       /* When loop_exit != scalar_exit due to if-conversion loop versioning,
    2026              :          the `scalar_exit' now has two incoming edges, one from the if-converted
    2027              :          and one from the peeled prolog loop.  It is therefore dominated by a
    2028              :          common block between these.  Update its dominator accordingly.  */
    2029          232 :       if (create_main_e && loop_exit != scalar_exit)
    2030            0 :         set_immediate_dominator (CDI_DOMINATORS, scalar_exit->dest,
    2031              :                                  recompute_dominator (CDI_DOMINATORS,
    2032              :                                                       scalar_exit->dest));
    2033              :     }
    2034              : 
    2035        34915 :   free (new_bbs);
    2036        34915 :   free (bbs);
    2037              : 
    2038        34915 :   checking_verify_dominators (CDI_DOMINATORS);
    2039              : 
    2040        34915 :   return new_loop;
    2041        34915 : }
    2042              : 
    2043              : 
    2044              : /* Given the condition expression COND, put it as the last statement of
    2045              :    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
    2046              :    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
    2047              :    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
    2048              :    new edge as irreducible if IRREDUCIBLE_P is true.  */
    2049              : 
    2050              : static edge
    2051        50927 : slpeel_add_loop_guard (basic_block guard_bb, tree cond,
    2052              :                        basic_block guard_to, basic_block dom_bb,
    2053              :                        profile_probability probability, bool irreducible_p)
    2054              : {
    2055        50927 :   gimple_stmt_iterator gsi;
    2056        50927 :   edge new_e, enter_e;
    2057        50927 :   gcond *cond_stmt;
    2058        50927 :   gimple_seq gimplify_stmt_list = NULL;
    2059              : 
    2060        50927 :   enter_e = EDGE_SUCC (guard_bb, 0);
    2061        50927 :   enter_e->flags &= ~EDGE_FALLTHRU;
    2062        50927 :   enter_e->flags |= EDGE_FALSE_VALUE;
    2063        50927 :   gsi = gsi_last_bb (guard_bb);
    2064              : 
    2065        50927 :   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
    2066              :                                  is_gimple_condexpr_for_cond, NULL_TREE);
    2067        50927 :   if (gimplify_stmt_list)
    2068        22938 :     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
    2069              : 
    2070        50927 :   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
    2071        50927 :   gsi = gsi_last_bb (guard_bb);
    2072        50927 :   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
    2073              : 
    2074              :   /* Add new edge to connect guard block to the merge/loop-exit block.  */
    2075        50927 :   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
    2076              : 
    2077        50927 :   new_e->probability = probability;
    2078        50927 :   if (irreducible_p)
    2079           14 :     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
    2080              : 
    2081        50927 :   enter_e->probability = probability.invert ();
    2082        50927 :   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
    2083              : 
    2084              :   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
    2085        50927 :   if (enter_e->dest->loop_father->header == enter_e->dest)
    2086          441 :     split_edge (enter_e);
    2087              : 
    2088        50927 :   return new_e;
    2089              : }
    2090              : 
    2091              : 
    2092              : /* This function verifies that the following restrictions apply to LOOP:
    2093              :    (1) it consists of exactly 2 basic blocks - header, and an empty latch
    2094              :        for innermost loop and 5 basic blocks for outer-loop.
    2095              :    (2) it is single entry, single exit
    2096              :    (3) its exit condition is the last stmt in the header
    2097              :    (4) E is the entry/exit edge of LOOP.
    2098              :  */
    2099              : 
    2100              : bool
    2101       421902 : slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
    2102              :                              const_edge e)
    2103              : {
    2104       421902 :   edge entry_e = loop_preheader_edge (loop);
    2105       421902 :   gcond *orig_cond = get_loop_exit_condition (exit_e);
    2106       421902 :   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
    2107              : 
    2108              :   /* All loops have an outer scope; the only case loop->outer is NULL is for
    2109              :      the function itself.  */
    2110       421902 :   if (!loop_outer (loop)
    2111       421902 :       || !empty_block_p (loop->latch)
    2112              :       || !exit_e
    2113              :       /* Verify that new loop exit condition can be trivially modified.  */
    2114       421902 :       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
    2115       843804 :       || (e != exit_e && e != entry_e))
    2116            0 :     return false;
    2117              : 
    2118       421902 :   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
    2119       421902 :   get_loop_body_with_size (loop, bbs, loop->num_nodes);
    2120       421902 :   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
    2121       421902 :   free (bbs);
    2122       421902 :   return ret;
    2123              : }
    2124              : 
    2125              : /* Function find_loop_location.
    2126              : 
    2127              :    Extract the location of the loop in the source code.
    2128              :    If the loop is not well formed for vectorization, an estimated
    2129              :    location is calculated.
    2130              :    Return the loop location if succeed and NULL if not.  */
    2131              : 
    2132              : dump_user_location_t
    2133      3569629 : find_loop_location (class loop *loop)
    2134              : {
    2135      3569629 :   gimple *stmt = NULL;
    2136      3569629 :   basic_block bb;
    2137      3569629 :   gimple_stmt_iterator si;
    2138              : 
    2139      3569629 :   if (!loop)
    2140            0 :     return dump_user_location_t ();
    2141              : 
    2142              :   /* For the root of the loop tree return the function location.  */
    2143      3569629 :   if (!loop_outer (loop))
    2144            0 :     return dump_user_location_t::from_function_decl (cfun->decl);
    2145              : 
    2146      3569629 :   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
    2147              :     {
    2148              :       /* We only care about the loop location, so use any exit with location
    2149              :          information.  */
    2150     11197484 :       for (edge e : get_loop_exit_edges (loop))
    2151              :         {
    2152      3665246 :           stmt = get_loop_exit_condition (e);
    2153              : 
    2154      3665246 :           if (stmt
    2155      3665246 :               && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2156      3117485 :             return stmt;
    2157      3569629 :         }
    2158              :     }
    2159              : 
    2160              :   /* If we got here the loop is probably not "well formed",
    2161              :      try to estimate the loop location */
    2162              : 
    2163       452144 :   if (!loop->header)
    2164            0 :     return dump_user_location_t ();
    2165              : 
    2166       452144 :   bb = loop->header;
    2167              : 
    2168      1597532 :   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    2169              :     {
    2170      1026206 :       stmt = gsi_stmt (si);
    2171      1026206 :       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2172       332962 :         return stmt;
    2173              :     }
    2174              : 
    2175       119182 :   return dump_user_location_t ();
    2176              : }
    2177              : 
    2178              : /* Return true if the phi described by STMT_INFO defines an IV of the
    2179              :    loop to be vectorized.  */
    2180              : 
    2181              : static bool
    2182      1118594 : iv_phi_p (stmt_vec_info stmt_info)
    2183              : {
    2184      1118594 :   gphi *phi = as_a <gphi *> (stmt_info->stmt);
    2185      2237188 :   if (virtual_operand_p (PHI_RESULT (phi)))
    2186              :     return false;
    2187              : 
    2188       891559 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
    2189       891559 :       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
    2190       116309 :     return false;
    2191              : 
    2192              :   return true;
    2193              : }
    2194              : 
    2195              : /* Return true if vectorizer can peel for nonlinear iv.  */
    2196              : static bool
    2197         7936 : vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
    2198              :                               stmt_vec_info stmt_info)
    2199              : {
    2200         7936 :   enum vect_induction_op_type induction_type
    2201              :     = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
    2202         7936 :   tree niters_skip;
    2203              :   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
    2204              :      if niters or vf is unkown:
    2205              :      For shift, when shift mount >= precision, there would be UD.
    2206              :      For mult, don't known how to generate
    2207              :      init_expr * pow (step, niters) for variable niters.
    2208              :      For neg unknown niters are ok, since niters of vectorized main loop
    2209              :      will always be multiple of 2.
    2210              :      See also PR113163,  PR114196 and PR114485.  */
    2211         7936 :   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    2212         7936 :       || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    2213         7936 :       || (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2214         4738 :           && induction_type != vect_step_op_neg))
    2215              :     {
    2216         4576 :       if (dump_enabled_p ())
    2217           12 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2218              :                          "Peeling for epilogue is not supported"
    2219              :                          " for this nonlinear induction"
    2220              :                          " when iteration count is unknown or"
    2221              :                          " when using partial vectorization.\n");
    2222         4576 :       return false;
    2223              :     }
    2224              : 
    2225         3360 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    2226          266 :       && induction_type == vect_step_op_mul)
    2227              :     {
    2228           24 :       if (dump_enabled_p ())
    2229            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2230              :                          "Peeling for is not supported for nonlinear mult"
    2231              :                          " induction using partial vectorization.\n");
    2232           24 :       return false;
    2233              :     }
    2234              : 
    2235              :   /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
    2236         3094 :   if (induction_type == vect_step_op_mul)
    2237              :     {
    2238          401 :       tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
    2239          401 :       tree type = TREE_TYPE (step_expr);
    2240              : 
    2241          796 :       if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
    2242          401 :           && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
    2243              :         {
    2244            6 :           if (dump_enabled_p ())
    2245            6 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2246              :                              "Avoid compile time hog on"
    2247              :                              " vect_peel_nonlinear_iv_init"
    2248              :                              " for nonlinear induction vec_step_op_mul"
    2249              :                              " when iteration count is too big.\n");
    2250            6 :           return false;
    2251              :         }
    2252              :     }
    2253              : 
    2254              :   /* Also doens't support peel for neg when niter is variable.
    2255              :      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
    2256         3330 :   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
    2257         3330 :   if ((niters_skip != NULL_TREE
    2258            0 :        && (TREE_CODE (niters_skip) != INTEGER_CST
    2259            0 :            || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
    2260         3330 :       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
    2261         3330 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
    2262              :     {
    2263            4 :       if (dump_enabled_p ())
    2264            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2265              :                          "Peeling for alignement is not supported"
    2266              :                          " for nonlinear induction when niters_skip"
    2267              :                          " is not constant.\n");
    2268            4 :       return false;
    2269              :     }
    2270              : 
    2271              :   return true;
    2272              : }
    2273              : 
    2274              : /* Function vect_can_advance_ivs_p
    2275              : 
    2276              :    In case the number of iterations that LOOP iterates is unknown at compile
    2277              :    time, an epilog loop will be generated, and the loop induction variables
    2278              :    (IVs) will be "advanced" to the value they are supposed to take just before
    2279              :    the epilog loop.  Here we check that the access function of the loop IVs
    2280              :    and the expression that represents the loop bound are simple enough.
    2281              :    These restrictions will be relaxed in the future.  */
    2282              : 
    2283              : bool
    2284       427029 : vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
    2285              : {
    2286       427029 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2287       427029 :   basic_block bb = loop->header;
    2288       427029 :   gphi_iterator gsi;
    2289              : 
    2290              :   /* Analyze phi functions of the loop header.  */
    2291              : 
    2292       427029 :   if (dump_enabled_p ())
    2293        24259 :     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
    2294      1443696 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2295              :     {
    2296      1022275 :       tree evolution_part;
    2297      1022275 :       enum vect_induction_op_type induction_type;
    2298              : 
    2299      1022275 :       gphi *phi = gsi.phi ();
    2300      1022275 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2301      1022275 :       if (dump_enabled_p ())
    2302        69846 :         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
    2303              :                          phi_info->stmt);
    2304              : 
    2305              :       /* Skip virtual phi's. The data dependences that are associated with
    2306              :          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
    2307              : 
    2308              :          Skip reduction phis.  */
    2309      1022275 :       if (!iv_phi_p (phi_info))
    2310              :         {
    2311       300312 :           if (dump_enabled_p ())
    2312        24734 :             dump_printf_loc (MSG_NOTE, vect_location,
    2313              :                              "reduc or virtual phi. skip.\n");
    2314       300312 :           continue;
    2315              :         }
    2316              : 
    2317       721963 :       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2318       721963 :       if (induction_type != vect_step_op_add)
    2319              :         {
    2320         7936 :           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
    2321              :             return false;
    2322              : 
    2323         3326 :           continue;
    2324              :         }
    2325              : 
    2326              :       /* Analyze the evolution function.  */
    2327              : 
    2328       714027 :       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2329       714027 :       if (evolution_part == NULL_TREE)
    2330              :         {
    2331          966 :           if (dump_enabled_p ())
    2332           78 :             dump_printf (MSG_MISSED_OPTIMIZATION,
    2333              :                          "No access function or evolution.\n");
    2334          966 :           return false;
    2335              :         }
    2336              : 
    2337              :       /* FORNOW: We do not transform initial conditions of IVs
    2338              :          which evolution functions are not invariants in the loop.  */
    2339              : 
    2340       713061 :       if (!expr_invariant_in_loop_p (loop, evolution_part))
    2341              :         {
    2342           32 :           if (dump_enabled_p ())
    2343            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2344              :                              "evolution not invariant in loop.\n");
    2345           32 :           return false;
    2346              :         }
    2347              : 
    2348              :       /* FORNOW: We do not transform initial conditions of IVs
    2349              :          which evolution functions are a polynomial of degree >= 2.  */
    2350              : 
    2351      1729696 :       if (tree_is_chrec (evolution_part))
    2352              :         {
    2353            0 :           if (dump_enabled_p ())
    2354            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2355              :                              "evolution is chrec.\n");
    2356            0 :           return false;
    2357              :         }
    2358              :     }
    2359              : 
    2360              :   return true;
    2361              : }
    2362              : 
    2363              : 
    2364              : /*   Function vect_update_ivs_after_vectorizer.
    2365              : 
    2366              :      "Advance" the induction variables of LOOP to the value they should take
    2367              :      after the execution of LOOP.  This is currently necessary because the
    2368              :      vectorizer does not handle induction variables that are used after the
    2369              :      loop.  Such a situation occurs when the last iterations of LOOP are
    2370              :      peeled, because:
    2371              :      1. We introduced new uses after LOOP for IVs that were not originally used
    2372              :         after LOOP: the IVs of LOOP are now used by an epilog loop.
    2373              :      2. LOOP is going to be vectorized; this means that it will iterate N/VF
    2374              :         times, whereas the loop IVs should be bumped N times.
    2375              : 
    2376              :      Input:
    2377              :      - LOOP - a loop that is going to be vectorized. The last few iterations
    2378              :               of LOOP were peeled.
    2379              :      - NITERS - the number of iterations that LOOP executes (before it is
    2380              :                 vectorized). i.e, the number of times the ivs should be bumped.
    2381              :      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
    2382              :                   coming out from LOOP on which there are uses of the LOOP ivs
    2383              :                   (this is the path from LOOP->exit to epilog_loop->preheader).
    2384              : 
    2385              :                   The new definitions of the ivs are placed in LOOP->exit.
    2386              :                   The phi args associated with the edge UPDATE_E in the bb
    2387              :                   UPDATE_E->dest are updated accordingly.
    2388              : 
    2389              :      - EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
    2390              :                       the main latch exit.
    2391              : 
    2392              :      Assumption 1: Like the rest of the vectorizer, this function assumes
    2393              :      a single loop exit that has a single predecessor.
    2394              : 
    2395              :      Assumption 2: The phi nodes in the LOOP header and in update_bb are
    2396              :      organized in the same order.
    2397              : 
    2398              :      Assumption 3: The access function of the ivs is simple enough (see
    2399              :      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
    2400              : 
    2401              :      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
    2402              :      coming out of LOOP on which the ivs of LOOP are used (this is the path
    2403              :      that leads to the epilog loop; other paths skip the epilog loop).  This
    2404              :      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
    2405              :      needs to have its phis updated.
    2406              :  */
    2407              : 
    2408              : static void
    2409        34719 : vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    2410              :                                   tree niters, edge update_e,
    2411              :                                   bool early_exit_p)
    2412              : {
    2413        34719 :   gphi_iterator gsi, gsi1;
    2414        34719 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2415        34719 :   basic_block update_bb = update_e->dest;
    2416        34719 :   basic_block exit_bb = update_e->src;
    2417              :   /* Check to see if this is an empty loop pre-header block.  If it exists
    2418              :      we need to use the edge from that block -> loop header for updates but
    2419              :      must use the original exit_bb to add any new adjustment because there
    2420              :      can be a skip_epilog edge bypassing the epilog and so the loop pre-header
    2421              :      too.  */
    2422        34722 :   if (empty_block_p (update_bb) && single_succ_p (update_bb))
    2423              :     {
    2424            3 :       update_e = single_succ_edge (update_bb);
    2425            3 :       update_bb = update_e->dest;
    2426              :     }
    2427        34719 :   gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
    2428              : 
    2429        34719 :   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
    2430       131038 :        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
    2431        96319 :        gsi_next (&gsi), gsi_next (&gsi1))
    2432              :     {
    2433        96319 :       tree init_expr;
    2434        96319 :       tree step_expr, off;
    2435        96319 :       tree type;
    2436        96319 :       tree var, ni, ni_name;
    2437              : 
    2438        96319 :       gphi *phi = gsi.phi ();
    2439        96319 :       gphi *phi1 = gsi1.phi ();
    2440        96319 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2441        96319 :       if (dump_enabled_p ())
    2442        12291 :         dump_printf_loc (MSG_NOTE, vect_location,
    2443              :                          "vect_update_ivs_after_vectorizer: phi: %G",
    2444              :                          (gimple *) phi);
    2445              : 
    2446              :       /* Skip reduction and virtual phis.  */
    2447        96319 :       if (!iv_phi_p (phi_info))
    2448              :         {
    2449        43032 :           if (dump_enabled_p ())
    2450         4744 :             dump_printf_loc (MSG_NOTE, vect_location,
    2451              :                              "reduc or virtual phi. skip.\n");
    2452        43032 :           continue;
    2453              :         }
    2454              : 
    2455        53287 :       type = TREE_TYPE (gimple_phi_result (phi));
    2456        53287 :       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2457        53287 :       step_expr = unshare_expr (step_expr);
    2458              : 
    2459              :       /* FORNOW: We do not support IVs whose evolution function is a polynomial
    2460              :          of degree >= 2 or exponential.  */
    2461        53287 :       gcc_assert (!tree_is_chrec (step_expr));
    2462              : 
    2463        53287 :       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    2464        53287 :       gimple_seq stmts = NULL;
    2465        53287 :       enum vect_induction_op_type induction_type
    2466              :         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2467              : 
    2468        53287 :       if (induction_type == vect_step_op_add)
    2469              :         {
    2470        53159 :           tree stype = TREE_TYPE (step_expr);
    2471        53159 :           off = fold_build2 (MULT_EXPR, stype,
    2472              :                                fold_convert (stype, niters), step_expr);
    2473              : 
    2474        53159 :           if (POINTER_TYPE_P (type))
    2475         3570 :             ni = fold_build_pointer_plus (init_expr, off);
    2476              :           else
    2477        49589 :             ni = fold_convert (type,
    2478              :                                fold_build2 (PLUS_EXPR, stype,
    2479              :                                             fold_convert (stype, init_expr),
    2480              :                                             off));
    2481              :         }
    2482              :       /* Don't bother call vect_peel_nonlinear_iv_init.  */
    2483          128 :       else if (induction_type == vect_step_op_neg)
    2484              :         ni = init_expr;
    2485              :       else
    2486           84 :         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
    2487              :                                           niters, step_expr,
    2488              :                                           induction_type, early_exit_p);
    2489              : 
    2490        53287 :       var = create_tmp_var (type, "tmp");
    2491              : 
    2492        53287 :       gimple_seq new_stmts = NULL;
    2493        53287 :       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
    2494              : 
    2495              :       /* Exit_bb shouldn't be empty, but we also can't insert after a ctrl
    2496              :          statements.  */
    2497        53287 :       if (!gsi_end_p (last_gsi) && !is_ctrl_stmt (gsi_stmt (last_gsi)))
    2498              :         {
    2499          248 :           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
    2500          248 :           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
    2501              :         }
    2502              :       else
    2503              :         {
    2504        53039 :           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
    2505        53039 :           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
    2506              :         }
    2507              : 
    2508              :       /* Update the PHI argument on the requested edge.  */
    2509        53287 :       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
    2510              :     }
    2511        34719 : }
    2512              : 
    2513              : /* Return a gimple value containing the misalignment (measured in vector
    2514              :    elements) for the loop described by LOOP_VINFO, i.e. how many elements
    2515              :    it is away from a perfectly aligned address.  Add any new statements
    2516              :    to SEQ.  */
    2517              : 
    2518              : static tree
    2519          212 : get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
    2520              : {
    2521          212 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2522          212 :   stmt_vec_info stmt_info = dr_info->stmt;
    2523          212 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2524              : 
    2525          212 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2526          212 :   unsigned HOST_WIDE_INT target_align_c;
    2527          212 :   tree target_align_minus_1;
    2528              : 
    2529          212 :   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2530          212 :                                         size_zero_node) < 0;
    2531          212 :   tree offset = (negative
    2532          212 :                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    2533              :                              * TREE_INT_CST_LOW
    2534              :                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    2535          209 :                  : size_zero_node);
    2536          212 :   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
    2537              :                                                           stmt_info, seq,
    2538              :                                                           offset);
    2539          212 :   tree type = unsigned_type_for (TREE_TYPE (start_addr));
    2540          212 :   if (target_align.is_constant (&target_align_c))
    2541          212 :     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
    2542              :   else
    2543              :     {
    2544              :       tree vla = build_int_cst (type, target_align);
    2545              :       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla,
    2546              :                                           build_int_cst (type, 1));
    2547              :     }
    2548              : 
    2549          212 :   HOST_WIDE_INT elem_size
    2550          212 :     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2551          424 :   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
    2552              : 
    2553              :   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
    2554          212 :   tree int_start_addr = fold_convert (type, start_addr);
    2555          212 :   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
    2556              :                                         target_align_minus_1);
    2557              : 
    2558              :   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
    2559          212 :   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
    2560              :                                         elem_size_log);
    2561              : 
    2562          212 :   return misalign_in_elems;
    2563              : }
    2564              : 
    2565              : /* Function vect_gen_prolog_loop_niters
    2566              : 
    2567              :    Generate the number of iterations which should be peeled as prolog for the
    2568              :    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
    2569              :    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
    2570              :    As a result, after the execution of this loop, the data reference DR will
    2571              :    refer to an aligned location.  The following computation is generated:
    2572              : 
    2573              :    If the misalignment of DR is known at compile time:
    2574              :      addr_mis = int mis = DR_MISALIGNMENT (dr);
    2575              :    Else, compute address misalignment in bytes:
    2576              :      addr_mis = addr & (target_align - 1)
    2577              : 
    2578              :    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
    2579              : 
    2580              :    (elem_size = element type size; an element is the scalar element whose type
    2581              :    is the inner type of the vectype)
    2582              : 
    2583              :    The computations will be emitted at the end of BB.  We also compute and
    2584              :    store upper bound (included) of the result in BOUND.
    2585              : 
    2586              :    When the step of the data-ref in the loop is not 1 (as in interleaved data
    2587              :    and SLP), the number of iterations of the prolog must be divided by the step
    2588              :    (which is equal to the size of interleaved group).
    2589              : 
    2590              :    The above formulas assume that VF == number of elements in the vector. This
    2591              :    may not hold when there are multiple-types in the loop.
    2592              :    In this case, for some data-references in the loop the VF does not represent
    2593              :    the number of elements that fit in the vector.  Therefore, instead of VF we
    2594              :    use TYPE_VECTOR_SUBPARTS.  */
    2595              : 
    2596              : static tree
    2597          438 : vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
    2598              :                              basic_block bb, poly_int64 *bound)
    2599              : {
    2600          438 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2601          438 :   tree var;
    2602          438 :   tree niters_type
    2603          438 :     = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) ? sizetype
    2604          438 :                                                  : TREE_TYPE (LOOP_VINFO_NITERS
    2605              :                                                               (loop_vinfo));
    2606          438 :   gimple_seq stmts = NULL, new_stmts = NULL;
    2607          438 :   tree iters, iters_name;
    2608          438 :   stmt_vec_info stmt_info = dr_info->stmt;
    2609          438 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2610          438 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2611              : 
    2612          438 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2613              :     {
    2614          226 :       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2615              : 
    2616          226 :       if (dump_enabled_p ())
    2617          217 :         dump_printf_loc (MSG_NOTE, vect_location,
    2618              :                          "known peeling = %d.\n", npeel);
    2619              : 
    2620          226 :       iters = build_int_cst (niters_type, npeel);
    2621          226 :       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2622              :     }
    2623              :   else
    2624              :     {
    2625          212 :       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
    2626          212 :       tree type = TREE_TYPE (misalign_in_elems);
    2627          212 :       HOST_WIDE_INT elem_size
    2628          212 :         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2629              :       /* We only do prolog peeling if the target alignment is known at compile
    2630              :          time.  */
    2631          212 :       poly_uint64 align_in_elems =
    2632          212 :         exact_div (target_align, elem_size);
    2633          212 :       tree align_in_elems_minus_1 =
    2634          212 :         build_int_cst (type, align_in_elems - 1);
    2635          212 :       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
    2636              : 
    2637              :       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
    2638              :                                  & (align_in_elems - 1)).  */
    2639          212 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2640          212 :                                             size_zero_node) < 0;
    2641          212 :       if (negative)
    2642            3 :         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
    2643              :                              align_in_elems_tree);
    2644              :       else
    2645          209 :         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
    2646              :                              misalign_in_elems);
    2647          212 :       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
    2648          212 :       iters = fold_convert (niters_type, iters);
    2649          212 :       *bound = align_in_elems;
    2650              :     }
    2651              : 
    2652          438 :   if (dump_enabled_p ())
    2653          271 :     dump_printf_loc (MSG_NOTE, vect_location,
    2654              :                      "niters for prolog loop: %T\n", iters);
    2655              : 
    2656          438 :   var = create_tmp_var (niters_type, "prolog_loop_niters");
    2657          438 :   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
    2658              : 
    2659          438 :   if (new_stmts)
    2660          212 :     gimple_seq_add_seq (&stmts, new_stmts);
    2661          438 :   if (stmts)
    2662              :     {
    2663          212 :       gcc_assert (single_succ_p (bb));
    2664          212 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
    2665          212 :       if (gsi_end_p (gsi))
    2666           42 :         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2667              :       else
    2668          170 :         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
    2669              :     }
    2670          438 :   return iters_name;
    2671              : }
    2672              : 
    2673              : 
    2674              : /* Function vect_update_init_of_dr
    2675              : 
    2676              :    If CODE is PLUS, the vector loop starts NITERS iterations after the
    2677              :    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
    2678              :    iterations before the scalar one (using masking to skip inactive
    2679              :    elements).  This function updates the information recorded in DR to
    2680              :    account for the difference.  Specifically, it updates the OFFSET
    2681              :    field of DR_INFO.  */
    2682              : 
    2683              : static void
    2684        25874 : vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
    2685              : {
    2686        25874 :   struct data_reference *dr = dr_info->dr;
    2687        25874 :   tree offset = dr_info->offset;
    2688        25874 :   if (!offset)
    2689        25874 :     offset = build_zero_cst (sizetype);
    2690              : 
    2691        25874 :   niters = fold_build2 (MULT_EXPR, sizetype,
    2692              :                         fold_convert (sizetype, niters),
    2693              :                         fold_convert (sizetype, DR_STEP (dr)));
    2694        25874 :   offset = fold_build2 (code, sizetype,
    2695              :                         fold_convert (sizetype, offset), niters);
    2696        25874 :   dr_info->offset = offset;
    2697        25874 : }
    2698              : 
    2699              : 
    2700              : /* Function vect_update_inits_of_drs
    2701              : 
    2702              :    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
    2703              :    CODE and NITERS are as for vect_update_inits_of_dr.  */
    2704              : 
    2705              : void
    2706         7495 : vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
    2707              :                           tree_code code)
    2708              : {
    2709         7495 :   unsigned int i;
    2710         7495 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2711         7495 :   struct data_reference *dr;
    2712              : 
    2713         7495 :   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
    2714              : 
    2715              :   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
    2716              :      here, but since we might use these niters to update the epilogues niters
    2717              :      and data references we can't insert them here as this definition might not
    2718              :      always dominate its uses.  */
    2719         7495 :   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
    2720         4586 :     niters = fold_convert (sizetype, niters);
    2721              : 
    2722        41172 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2723              :     {
    2724        26300 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2725        26300 :       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
    2726        25874 :           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
    2727        25874 :         vect_update_init_of_dr (dr_info, niters, code);
    2728              :     }
    2729         7495 : }
    2730              : 
    2731              : /* For the information recorded in LOOP_VINFO prepare the loop for peeling
    2732              :    by masking.  This involves calculating the number of iterations to
    2733              :    be peeled and then aligning all memory references appropriately.  */
    2734              : 
    2735              : void
    2736            1 : vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
    2737              : {
    2738            1 :   tree misalign_in_elems;
    2739            1 :   tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    2740              : 
    2741            1 :   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
    2742              : 
    2743              :   /* From the information recorded in LOOP_VINFO get the number of iterations
    2744              :      that need to be skipped via masking.  */
    2745            1 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2746              :     {
    2747            1 :       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    2748            1 :                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
    2749            1 :       misalign_in_elems = build_int_cst (type, misalign);
    2750              :     }
    2751              :   else
    2752              :     {
    2753            0 :       gimple_seq seq1 = NULL, seq2 = NULL;
    2754            0 :       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
    2755            0 :       misalign_in_elems = fold_convert (type, misalign_in_elems);
    2756            0 :       misalign_in_elems = force_gimple_operand (misalign_in_elems,
    2757              :                                                 &seq2, true, NULL_TREE);
    2758            0 :       gimple_seq_add_seq (&seq1, seq2);
    2759            0 :       if (seq1)
    2760              :         {
    2761            0 :           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2762            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
    2763            0 :           gcc_assert (!new_bb);
    2764              :         }
    2765              :     }
    2766              : 
    2767            1 :   if (dump_enabled_p ())
    2768            1 :     dump_printf_loc (MSG_NOTE, vect_location,
    2769              :                      "misalignment for fully-masked loop: %T\n",
    2770              :                      misalign_in_elems);
    2771              : 
    2772            1 :   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
    2773              : 
    2774            1 :   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
    2775            1 : }
    2776              : 
    2777              : /* This function builds ni_name = number of iterations.  Statements
    2778              :    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
    2779              :    it to TRUE if new ssa_var is generated.  */
    2780              : 
    2781              : tree
    2782        61856 : vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
    2783              : {
    2784        61856 :   if (LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
    2785              :     return NULL_TREE;
    2786        61782 :   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
    2787        61782 :   if (TREE_CODE (ni) == INTEGER_CST)
    2788              :     return ni;
    2789              :   else
    2790              :     {
    2791        25779 :       tree ni_name, var;
    2792        25779 :       gimple_seq stmts = NULL;
    2793        25779 :       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2794              : 
    2795        25779 :       var = create_tmp_var (TREE_TYPE (ni), "niters");
    2796        25779 :       ni_name = force_gimple_operand (ni, &stmts, false, var);
    2797        25779 :       if (stmts)
    2798              :         {
    2799        24723 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2800        24723 :           if (new_var_p != NULL)
    2801          222 :             *new_var_p = true;
    2802              :         }
    2803              : 
    2804        25779 :       return ni_name;
    2805              :     }
    2806              : }
    2807              : 
    2808              : /* Calculate the number of iterations above which vectorized loop will be
    2809              :    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
    2810              :    of prolog loop.  If it's integer const, the integer number is also passed
    2811              :    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
    2812              :    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
    2813              :    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
    2814              :    threshold below which the scalar (rather than vectorized) loop will be
    2815              :    executed.  This function stores the upper bound (inclusive) of the result
    2816              :    in BOUND_SCALAR.  */
    2817              : 
    2818              : static tree
    2819        24799 : vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
    2820              :                              poly_int64 bound_prolog, poly_int64 bound_epilog,
    2821              :                              int th, poly_uint64 *bound_scalar,
    2822              :                              bool check_profitability)
    2823              : {
    2824        24799 :   tree type = TREE_TYPE (niters_prolog);
    2825        24799 :   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
    2826              :                              build_int_cst (type, bound_epilog));
    2827              : 
    2828        24799 :   *bound_scalar = bound_prolog + bound_epilog;
    2829        24799 :   if (check_profitability)
    2830              :     {
    2831              :       /* TH indicates the minimum niters of vectorized loop, while we
    2832              :          compute the maximum niters of scalar loop.  */
    2833        15877 :       th--;
    2834              :       /* Peeling for constant times.  */
    2835        15877 :       if (int_niters_prolog >= 0)
    2836              :         {
    2837        15839 :           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
    2838        15839 :           return build_int_cst (type, *bound_scalar);
    2839              :         }
    2840              :       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
    2841              :          and BOUND_EPILOG are inclusive upper bounds.  */
    2842           38 :       if (known_ge (th, bound_prolog + bound_epilog))
    2843              :         {
    2844            0 :           *bound_scalar = th;
    2845            0 :           return build_int_cst (type, th);
    2846              :         }
    2847              :       /* Need to do runtime comparison.  */
    2848           38 :       else if (maybe_gt (th, bound_epilog))
    2849              :         {
    2850           38 :           *bound_scalar = upper_bound (*bound_scalar, th);
    2851           38 :           return fold_build2 (MAX_EXPR, type,
    2852              :                               build_int_cst (type, th), niters);
    2853              :         }
    2854              :     }
    2855              :   return niters;
    2856              : }
    2857              : 
    2858              : /* NITERS is the number of times that the original scalar loop executes
    2859              :    after peeling.  Work out the maximum number of iterations N that can
    2860              :    be handled by the vectorized form of the loop and then either:
    2861              : 
    2862              :    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
    2863              : 
    2864              :         niters_vector = N
    2865              : 
    2866              :    b) set *STEP_VECTOR_PTR to one and generate:
    2867              : 
    2868              :         niters_vector = N / vf
    2869              : 
    2870              :    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
    2871              :    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
    2872              :    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).
    2873              : 
    2874              :    Case (a) is used for LOOP_VINFO_USING_PARTIAL_VECTORS_P or if VF is
    2875              :    variable.  As stated above, NITERS_VECTOR then equals the number
    2876              :    of scalar iterations and vect_set_loop_condition will handle the
    2877              :    step.  As opposed to (b) we don't know anything about NITER_VECTOR's
    2878              :    range here.
    2879              : */
    2880              : 
    2881              : void
    2882        34047 : vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
    2883              :                              tree *niters_vector_ptr, tree *step_vector_ptr,
    2884              :                              bool niters_no_overflow)
    2885              : {
    2886        34047 :   tree ni_minus_gap, var;
    2887        34047 :   tree niters_vector, step_vector;
    2888        34047 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    2889        34047 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2890        34047 :   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2891              : 
    2892              :   /* If epilogue loop is required because of data accesses with gaps, we
    2893              :      subtract one iteration from the total number of iterations here for
    2894              :      correct calculation of RATIO.  */
    2895        34047 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2896              :     {
    2897          372 :       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
    2898              :                                   build_one_cst (type));
    2899          372 :       if (!is_gimple_val (ni_minus_gap))
    2900              :         {
    2901          170 :           var = create_tmp_var (type, "ni_gap");
    2902          170 :           gimple *stmts = NULL;
    2903          170 :           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
    2904              :                                                true, var);
    2905          170 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2906              :         }
    2907              :     }
    2908              :   else
    2909              :     ni_minus_gap = niters;
    2910              : 
    2911              :   /* To silence some unexpected warnings, simply initialize to 0. */
    2912        34047 :   unsigned HOST_WIDE_INT const_vf = 0;
    2913        34047 :   if (vf.is_constant (&const_vf)
    2914        34047 :       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    2915              :     {
    2916              :       /* Create: niters / vf, which is equivalent to niters >> log2(vf) when
    2917              :                  vf is a power of two, and when not we approximate using a
    2918              :                  truncating division.  */
    2919              :       /* If it's known that niters == number of latch executions + 1 doesn't
    2920              :          overflow, we can generate niters / vf; otherwise we generate
    2921              :          (niters - vf) / vf + 1 by using the fact that we know ratio
    2922              :          will be at least one.  */
    2923        34029 :       tree var_vf = build_int_cst (type, const_vf);
    2924        34029 :       if (niters_no_overflow)
    2925        33804 :         niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
    2926              :                                      var_vf);
    2927              :       else
    2928          225 :         niters_vector
    2929          225 :           = fold_build2 (PLUS_EXPR, type,
    2930              :                          fold_build2 (TRUNC_DIV_EXPR, type,
    2931              :                                       fold_build2 (MINUS_EXPR, type,
    2932              :                                                    ni_minus_gap,
    2933              :                                                    var_vf),
    2934              :                                       var_vf),
    2935              :                          build_int_cst (type, 1));
    2936        34029 :       step_vector = build_one_cst (type);
    2937              :     }
    2938              :   else
    2939              :     {
    2940           18 :       niters_vector = ni_minus_gap;
    2941           18 :       step_vector = build_int_cst (type, vf);
    2942              :     }
    2943              : 
    2944        34047 :   if (!is_gimple_val (niters_vector))
    2945              :     {
    2946        25549 :       var = create_tmp_var (type, "bnd");
    2947        25549 :       gimple_seq stmts = NULL;
    2948        25549 :       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
    2949        25549 :       gsi_insert_seq_on_edge_immediate (pe, stmts);
    2950              :       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
    2951              :          we set range information to make niters analyzer's life easier.
    2952              :          Note the number of latch iteration value can be TYPE_MAX_VALUE so
    2953              :          we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
    2954        25549 :       if (stmts != NULL
    2955        25549 :           && integer_onep (step_vector))
    2956              :         {
    2957        25534 :           if (niters_no_overflow)
    2958              :             {
    2959        25327 :               int_range<1> vr (type,
    2960        50654 :                                wi::one (TYPE_PRECISION (type)),
    2961        50654 :                                wi::div_trunc (wi::max_value
    2962        25327 :                                               (TYPE_PRECISION (type),
    2963        25327 :                                                TYPE_SIGN (type)),
    2964              :                                               const_vf,
    2965        50654 :                                               TYPE_SIGN (type)));
    2966        25327 :               set_range_info (niters_vector, vr);
    2967        25327 :             }
    2968              :           /* For VF == 1 the vector IV might also overflow so we cannot
    2969              :              assert a minimum value of 1.  */
    2970          207 :           else if (const_vf > 1)
    2971              :             {
    2972          171 :               int_range<1> vr (type,
    2973          342 :                                wi::one (TYPE_PRECISION (type)),
    2974          513 :                                wi::rshift (wi::max_value (TYPE_PRECISION (type),
    2975          171 :                                                           TYPE_SIGN (type))
    2976          342 :                                            - (const_vf - 1),
    2977          342 :                                            exact_log2 (const_vf), TYPE_SIGN (type))
    2978          684 :                                + 1);
    2979          171 :               set_range_info (niters_vector, vr);
    2980          171 :             }
    2981              :         }
    2982              :     }
    2983        34047 :   *niters_vector_ptr = niters_vector;
    2984        34047 :   *step_vector_ptr = step_vector;
    2985              : 
    2986        34047 :   return;
    2987              : }
    2988              : 
    2989              : /* Given NITERS_VECTOR which is the number of iterations for vectorized
    2990              :    loop specified by LOOP_VINFO after vectorization, compute the number
    2991              :    of iterations before vectorization (niters_vector * vf) and store it
    2992              :    to NITERS_VECTOR_MULT_VF_PTR.  */
    2993              : 
    2994              : static void
    2995        33299 : vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
    2996              :                                      tree niters_vector,
    2997              :                                      tree *niters_vector_mult_vf_ptr)
    2998              : {
    2999              :   /* We should be using a step_vector of VF if VF is variable.  */
    3000        33299 :   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
    3001        33299 :   tree type = TREE_TYPE (niters_vector);
    3002        33299 :   tree tree_vf = build_int_cst (type, vf);
    3003        33299 :   basic_block exit_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3004              : 
    3005        33299 :   gcc_assert (niters_vector_mult_vf_ptr != NULL);
    3006        33299 :   tree niters_vector_mult_vf = fold_build2 (MULT_EXPR, type,
    3007              :                                             niters_vector, tree_vf);
    3008              : 
    3009              :   /* If we've peeled a vector iteration then subtract one full vector
    3010              :      iteration.  */
    3011        33299 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3012           19 :     niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
    3013              :                                          niters_vector_mult_vf, tree_vf);
    3014              : 
    3015        33299 :   if (!is_gimple_val (niters_vector_mult_vf))
    3016              :     {
    3017        24824 :       tree var = create_tmp_var (type, "niters_vector_mult_vf");
    3018        24824 :       gimple_seq stmts = NULL;
    3019        24824 :       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
    3020              :                                                     &stmts, true, var);
    3021        24824 :       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
    3022        24824 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    3023              :     }
    3024        33299 :   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
    3025        33299 : }
    3026              : 
    3027              : /* Function slpeel_add_loop_guard adds guard skipping from the beginning
    3028              :    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
    3029              :    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
    3030              :    appear like below:
    3031              : 
    3032              :        guard_bb:
    3033              :          if (cond)
    3034              :            goto merge_bb;
    3035              :          else
    3036              :            goto skip_loop;
    3037              : 
    3038              :      skip_loop:
    3039              :        header_a:
    3040              :          i_1 = PHI<i_0, i_2>;
    3041              :          ...
    3042              :          i_2 = i_1 + 1;
    3043              :          if (cond_a)
    3044              :            goto latch_a;
    3045              :          else
    3046              :            goto exit_a;
    3047              :        latch_a:
    3048              :          goto header_a;
    3049              : 
    3050              :        exit_a:
    3051              :          i_5 = PHI<i_2>;
    3052              : 
    3053              :        merge_bb:
    3054              :          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
    3055              : 
    3056              :      update_loop:
    3057              :        header_b:
    3058              :          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
    3059              :          ...
    3060              :          i_4 = i_3 + 1;
    3061              :          if (cond_b)
    3062              :            goto latch_b;
    3063              :          else
    3064              :            goto exit_bb;
    3065              :        latch_b:
    3066              :          goto header_b;
    3067              : 
    3068              :        exit_bb:
    3069              : 
    3070              :    This function creates PHI nodes at merge_bb and replaces the use of i_5
    3071              :    in the update_loop's PHI node with the result of new PHI result.  */
    3072              : 
    3073              : static void
    3074        25237 : slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
    3075              :                                     class loop *update_loop,
    3076              :                                     edge guard_edge, edge merge_edge)
    3077              : {
    3078        25237 :   location_t merge_loc, guard_loc;
    3079        25237 :   edge orig_e = loop_preheader_edge (skip_loop);
    3080        25237 :   edge update_e = loop_preheader_edge (update_loop);
    3081        25237 :   gphi_iterator gsi_orig, gsi_update;
    3082              : 
    3083        25237 :   for ((gsi_orig = gsi_start_phis (skip_loop->header),
    3084        25237 :         gsi_update = gsi_start_phis (update_loop->header));
    3085        92455 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    3086        67218 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    3087              :     {
    3088        67218 :       gphi *orig_phi = gsi_orig.phi ();
    3089        67218 :       gphi *update_phi = gsi_update.phi ();
    3090              : 
    3091              :       /* Generate new phi node at merge bb of the guard.  */
    3092        67218 :       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    3093        67218 :       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
    3094              : 
    3095              :       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
    3096              :          args in NEW_PHI for these edges.  */
    3097        67218 :       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
    3098        67218 :       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    3099        67218 :       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
    3100        67218 :       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    3101        67218 :       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
    3102        67218 :       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
    3103              : 
    3104              :       /* Update phi in UPDATE_PHI.  */
    3105        67218 :       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
    3106              :     }
    3107        25237 : }
    3108              : 
    3109              : /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
    3110              :    Return a value that equals:
    3111              : 
    3112              :    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
    3113              :    - SKIP_VALUE when the main loop is skipped.  */
    3114              : 
    3115              : tree
    3116         3961 : vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
    3117              :                            tree skip_value)
    3118              : {
    3119         3961 :   gcc_assert (loop_vinfo->main_loop_edge);
    3120              : 
    3121         3961 :   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
    3122         3961 :   basic_block bb = loop_vinfo->main_loop_edge->dest;
    3123         3961 :   gphi *new_phi = create_phi_node (phi_result, bb);
    3124         3961 :   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
    3125              :                UNKNOWN_LOCATION);
    3126         3961 :   add_phi_arg (new_phi, skip_value,
    3127              :                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
    3128         3961 :   return phi_result;
    3129              : }
    3130              : 
    3131              : /* Function vect_do_peeling.
    3132              : 
    3133              :    Input:
    3134              :    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
    3135              : 
    3136              :        preheader:
    3137              :      LOOP:
    3138              :        header_bb:
    3139              :          loop_body
    3140              :          if (exit_loop_cond) goto exit_bb
    3141              :          else                goto header_bb
    3142              :        exit_bb:
    3143              : 
    3144              :    - NITERS: The number of iterations of the loop.
    3145              :    - NITERSM1: The number of iterations of the loop's latch.
    3146              :    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
    3147              :    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
    3148              :                               CHECK_PROFITABILITY is true.
    3149              :    Output:
    3150              :    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
    3151              :      iterate after vectorization; see vect_set_loop_condition for details.
    3152              :    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
    3153              :      should be set to the number of scalar iterations handled by the
    3154              :      vector loop.  The SSA name is only used on exit from the loop.
    3155              : 
    3156              :    This function peels prolog and epilog from the loop, adds guards skipping
    3157              :    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
    3158              :    would look like:
    3159              : 
    3160              :        guard_bb_1:
    3161              :          if (prefer_scalar_loop) goto merge_bb_1
    3162              :          else                    goto guard_bb_2
    3163              : 
    3164              :        guard_bb_2:
    3165              :          if (skip_prolog) goto merge_bb_2
    3166              :          else             goto prolog_preheader
    3167              : 
    3168              :        prolog_preheader:
    3169              :      PROLOG:
    3170              :        prolog_header_bb:
    3171              :          prolog_body
    3172              :          if (exit_prolog_cond) goto prolog_exit_bb
    3173              :          else                  goto prolog_header_bb
    3174              :        prolog_exit_bb:
    3175              : 
    3176              :        merge_bb_2:
    3177              : 
    3178              :        vector_preheader:
    3179              :      VECTOR LOOP:
    3180              :        vector_header_bb:
    3181              :          vector_body
    3182              :          if (exit_vector_cond) goto vector_exit_bb
    3183              :          else                  goto vector_header_bb
    3184              :        vector_exit_bb:
    3185              : 
    3186              :        guard_bb_3:
    3187              :          if (skip_epilog) goto merge_bb_3
    3188              :          else             goto epilog_preheader
    3189              : 
    3190              :        merge_bb_1:
    3191              : 
    3192              :        epilog_preheader:
    3193              :      EPILOG:
    3194              :        epilog_header_bb:
    3195              :          epilog_body
    3196              :          if (exit_epilog_cond) goto merge_bb_3
    3197              :          else                  goto epilog_header_bb
    3198              : 
    3199              :        merge_bb_3:
    3200              : 
    3201              :    Note this function peels prolog and epilog only if it's necessary,
    3202              :    as well as guards.
    3203              :    This function returns the epilogue loop if a decision was made to vectorize
    3204              :    it, otherwise NULL.
    3205              : 
    3206              :    The analysis resulting in this epilogue loop's loop_vec_info was performed
    3207              :    in the same vect_analyze_loop call as the main loop's.  At that time
    3208              :    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
    3209              :    vectorization factors than the main loop.  This list is chained in the
    3210              :    loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
    3211              :    vectorize the epilogue loop for a lower vectorization factor, the
    3212              :    loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
    3213              :    This is later used to vectorize the epilogue.
    3214              :    The reason the loop_vec_info needs updating is that it was
    3215              :    constructed based on the original main loop, and the epilogue loop is a
    3216              :    copy of this loop, so all links pointing to statements in the original loop
    3217              :    need updating.  Furthermore, these loop_vec_infos share the
    3218              :    data_reference's records, which will also need to be updated.
    3219              : 
    3220              :    TODO: Guard for prefer_scalar_loop should be emitted along with
    3221              :    versioning conditions if loop versioning is needed.  */
    3222              : 
    3223              : 
    3224              : class loop *
    3225        61418 : vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
    3226              :                  tree *niters_vector, tree *step_vector,
    3227              :                  tree *niters_vector_mult_vf_var, int th,
    3228              :                  bool check_profitability, bool niters_no_overflow,
    3229              :                  tree *advance)
    3230              : {
    3231        61418 :   edge e, guard_e;
    3232        61418 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    3233        61418 :   tree guard_cond;
    3234        61418 :   basic_block guard_bb, guard_to;
    3235        61418 :   profile_probability prob_prolog, prob_vector, prob_epilog;
    3236        61418 :   int estimated_vf;
    3237        61418 :   int prolog_peeling = 0;
    3238        61418 :   bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
    3239        61418 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    3240              : 
    3241        61418 :   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
    3242        61417 :     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    3243              : 
    3244        61418 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    3245        61418 :   poly_uint64 bound_epilog = 0;
    3246        61418 :   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    3247        61400 :       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
    3248        32410 :     bound_epilog += vf - 1;
    3249        61418 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    3250          372 :     bound_epilog += 1;
    3251              : 
    3252              :   /* For early breaks the scalar loop needs to execute at most VF times
    3253              :      to find the element that caused the break.  */
    3254        61418 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3255         1406 :     bound_epilog = vf;
    3256              : 
    3257        61418 :   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
    3258        61418 :   poly_uint64 bound_scalar = bound_epilog;
    3259              : 
    3260        61418 :   if (!prolog_peeling && !epilog_peeling)
    3261              :     return NULL;
    3262              : 
    3263              :   /* Before doing any peeling make sure to reset debug binds outside of
    3264              :      the loop referring to defs not in LC SSA.  */
    3265        33373 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    3266       101966 :   for (unsigned i = 0; i < loop->num_nodes; ++i)
    3267              :     {
    3268        68593 :       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
    3269        68593 :       imm_use_iterator ui;
    3270        68593 :       gimple *use_stmt;
    3271       162514 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    3272        93921 :            gsi_next (&gsi))
    3273              :         {
    3274       391365 :           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
    3275       223656 :             if (gimple_debug_bind_p (use_stmt)
    3276        20133 :                 && loop != gimple_bb (use_stmt)->loop_father
    3277        20149 :                 && !flow_loop_nested_p (loop,
    3278           16 :                                         gimple_bb (use_stmt)->loop_father))
    3279              :               {
    3280            2 :                 gimple_debug_bind_reset_value (use_stmt);
    3281            2 :                 update_stmt (use_stmt);
    3282        93921 :               }
    3283              :         }
    3284       639694 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    3285       502508 :            gsi_next (&gsi))
    3286              :         {
    3287       502508 :           ssa_op_iter op_iter;
    3288       502508 :           def_operand_p def_p;
    3289       815221 :           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
    3290      1094295 :             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
    3291       505733 :               if (gimple_debug_bind_p (use_stmt)
    3292        36864 :                   && loop != gimple_bb (use_stmt)->loop_father
    3293        36891 :                   && !flow_loop_nested_p (loop,
    3294           27 :                                           gimple_bb (use_stmt)->loop_father))
    3295              :                 {
    3296            0 :                   gimple_debug_bind_reset_value (use_stmt);
    3297            0 :                   update_stmt (use_stmt);
    3298       312713 :                 }
    3299              :         }
    3300              :     }
    3301              : 
    3302        33373 :   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
    3303        33373 :   estimated_vf = vect_vf_for_cost (loop_vinfo);
    3304        33373 :   if (estimated_vf == 2)
    3305         6610 :     estimated_vf = 3;
    3306        33373 :   prob_prolog = prob_epilog = profile_probability::guessed_always ()
    3307        33373 :                         .apply_scale (estimated_vf - 1, estimated_vf);
    3308              : 
    3309        33373 :   class loop *prolog = NULL, *epilog = NULL;
    3310        33373 :   class loop *first_loop = loop;
    3311        33373 :   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    3312              : 
    3313              :   /* SSA form needs to be up-to-date since we are going to manually
    3314              :      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
    3315              :      update SSA state after that, so we have to make sure to not lose any
    3316              :      pending update needs.  */
    3317        33373 :   gcc_assert (!need_ssa_update_p (cfun));
    3318              : 
    3319              :   /* If we're vectorizing an epilogue loop, we have ensured that the
    3320              :      virtual operand is in SSA form throughout the vectorized main loop.
    3321              :      Normally it is possible to trace the updated
    3322              :      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
    3323              :      back to scalar-stmt vuses, meaning that the effect of the SSA update
    3324              :      remains local to the main loop.  However, there are rare cases in
    3325              :      which the vectorized loop should have vdefs even when the original scalar
    3326              :      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
    3327              :      introduces clobbers of the temporary vector array, which in turn
    3328              :      needs new vdefs.  If the scalar loop doesn't write to memory, these
    3329              :      new vdefs will be the only ones in the vector loop.
    3330              :      We are currently defering updating virtual SSA form and creating
    3331              :      of a virtual PHI for this case so we do not have to make sure the
    3332              :      newly introduced virtual def is in LCSSA form.  */
    3333              : 
    3334        33373 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
    3335              :     {
    3336        12395 :       gcc_assert (!adjust_vec.exists ());
    3337        12395 :       adjust_vec.create (32);
    3338              :     }
    3339        33373 :   initialize_original_copy_tables ();
    3340              : 
    3341              :   /* Record the anchor bb at which the guard should be placed if the scalar
    3342              :      loop might be preferred.  */
    3343        33373 :   basic_block anchor = loop_preheader_edge (loop)->src;
    3344              : 
    3345              :   /* Generate the number of iterations for the prolog loop.  We do this here
    3346              :      so that we can also get the upper bound on the number of iterations.  */
    3347        33373 :   tree niters_prolog;
    3348        33373 :   poly_int64 bound_prolog = 0;
    3349        33373 :   if (prolog_peeling)
    3350              :     {
    3351          438 :       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
    3352              :                                                    &bound_prolog);
    3353              :       /* If algonment peeling is known, we will always execute prolog.  */
    3354          438 :       if (TREE_CODE (niters_prolog) == INTEGER_CST)
    3355          226 :         prob_prolog = profile_probability::always ();
    3356              :     }
    3357              :   else
    3358        32935 :     niters_prolog = build_int_cst (type, 0);
    3359              : 
    3360        33373 :   loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
    3361        33373 :   tree niters_vector_mult_vf = NULL_TREE;
    3362              :   /* Saving NITERs before the loop, as this may be changed by prologue.  */
    3363        33373 :   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
    3364        33373 :   edge update_e = NULL, skip_e = NULL;
    3365        33373 :   unsigned int lowest_vf = constant_lower_bound (vf);
    3366              :   /* Prolog loop may be skipped.  */
    3367        33373 :   bool skip_prolog = (prolog_peeling != 0);
    3368              :   /* Skip this loop to epilog when there are not enough iterations to enter this
    3369              :      vectorized loop.  If true we should perform runtime checks on the NITERS
    3370              :      to check whether we should skip the current vectorized loop.  If we know
    3371              :      the number of scalar iterations we may choose to add a runtime check if
    3372              :      this number "maybe" smaller than the number of iterations required
    3373              :      when we know the number of scalar iterations may potentially
    3374              :      be smaller than the number of iterations required to enter this loop, for
    3375              :      this we use the upper bounds on the prolog and epilog peeling.  When we
    3376              :      don't know the number of iterations and don't require versioning it is
    3377              :      because we have asserted that there are enough scalar iterations to enter
    3378              :      the main loop, so this skip is not necessary.  When we are versioning then
    3379              :      we only add such a skip if we have chosen to vectorize the epilogue.  */
    3380        33373 :   bool skip_vector = false;
    3381        33373 :   if (!uncounted_p)
    3382         8516 :     skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3383        41846 :                    ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
    3384         8516 :                                bound_prolog + bound_epilog)
    3385        24837 :                    : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
    3386           38 :                       || vect_epilogues));
    3387              : 
    3388              :   /* Epilog loop must be executed if the number of iterations for epilog
    3389              :      loop is known at compile time, otherwise we need to add a check at
    3390              :      the end of vector loop and skip to the end of epilog loop.  */
    3391        33373 :   bool skip_epilog = (prolog_peeling < 0
    3392        33161 :                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3393        33373 :                       || !vf.is_constant ());
    3394              :   /* PEELING_FOR_GAPS and peeling for early breaks are special because epilog
    3395              :      loop must be executed.  */
    3396        33373 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
    3397        33001 :       || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3398         1778 :     skip_epilog = false;
    3399              : 
    3400        33373 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3401        33373 :   auto_vec<profile_count> original_counts;
    3402        33373 :   basic_block *original_bbs = NULL;
    3403              : 
    3404        33373 :   if (skip_vector)
    3405              :     {
    3406        24799 :       split_edge (loop_preheader_edge (loop));
    3407              : 
    3408        24799 :       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
    3409              :         {
    3410        22585 :           original_bbs = get_loop_body (loop);
    3411        68178 :           for (unsigned int i = 0; i < loop->num_nodes; i++)
    3412        45593 :             original_counts.safe_push(original_bbs[i]->count);
    3413              :         }
    3414              : 
    3415              :       /* Due to the order in which we peel prolog and epilog, we first
    3416              :          propagate probability to the whole loop.  The purpose is to
    3417              :          avoid adjusting probabilities of both prolog and vector loops
    3418              :          separately.  Note in this case, the probability of epilog loop
    3419              :          needs to be scaled back later.  */
    3420        24799 :       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
    3421        24799 :       if (prob_vector.initialized_p ())
    3422              :         {
    3423        24799 :           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
    3424        24799 :           scale_loop_profile (loop, prob_vector, -1);
    3425              :         }
    3426              :     }
    3427              : 
    3428        33373 :   if (vect_epilogues)
    3429              :     {
    3430              :       /* Make sure to set the epilogue's epilogue scalar loop, such that we can
    3431              :          use the original scalar loop as remaining epilogue if necessary.  */
    3432         7056 :       LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
    3433         7056 :         = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3434         7056 :       LOOP_VINFO_SCALAR_MAIN_EXIT (epilogue_vinfo)
    3435         7056 :         = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3436              :     }
    3437              : 
    3438        33373 :   if (prolog_peeling)
    3439              :     {
    3440          438 :       e = loop_preheader_edge (loop);
    3441          438 :       edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3442          438 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
    3443              :                            && (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3444              :                                || uncounted_p));
    3445              : 
    3446              :       /* Peel prolog and put it on preheader edge of loop.  */
    3447          438 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3448          438 :       edge prolog_e = NULL;
    3449          438 :       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
    3450              :                                                        scalar_loop, scalar_e,
    3451              :                                                        e, &prolog_e, true, NULL,
    3452              :                                                        false, uncounted_p);
    3453              : 
    3454          438 :       gcc_assert (prolog);
    3455          438 :       prolog->force_vectorize = false;
    3456              : 
    3457              :       /* Assign hierarchical discriminators to distinguish prolog loop.  */
    3458          438 :       gimple *prolog_last = last_nondebug_stmt (prolog->header);
    3459          438 :       location_t prolog_loc
    3460          438 :         = prolog_last ? gimple_location (prolog_last) : UNKNOWN_LOCATION;
    3461          438 :       if (prolog_loc != UNKNOWN_LOCATION)
    3462              :         {
    3463          436 :           unsigned int prolog_copyid = allocate_copyid_base (prolog_loc, 1);
    3464          436 :           assign_discriminators_to_loop (prolog, 0, prolog_copyid);
    3465              :         }
    3466          438 :       first_loop = prolog;
    3467          438 :       reset_original_copy_tables ();
    3468              : 
    3469              :       /* Update the number of iterations for prolog loop.  */
    3470          438 :       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
    3471          438 :       vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
    3472              :                                step_prolog, NULL_TREE, false);
    3473              : 
    3474              :       /* Skip the prolog loop.  */
    3475          438 :       if (skip_prolog)
    3476              :         {
    3477          438 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3478              :                                     niters_prolog, build_int_cst (type, 0));
    3479          438 :           guard_bb = loop_preheader_edge (prolog)->src;
    3480          438 :           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
    3481          438 :           guard_to = split_edge (loop_preheader_edge (loop));
    3482          438 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3483              :                                            guard_to, guard_bb,
    3484              :                                            prob_prolog.invert (),
    3485              :                                            irred_flag);
    3486         2008 :           for (edge alt_e : get_loop_exit_edges (prolog))
    3487              :             {
    3488          694 :               if (alt_e == prolog_e)
    3489          438 :                 continue;
    3490          256 :               basic_block old_dom
    3491          256 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    3492          256 :               if (flow_bb_inside_loop_p (prolog, old_dom))
    3493              :                 {
    3494          101 :                   auto_vec<basic_block, 8> queue;
    3495          101 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    3496          325 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    3497          224 :                     if (!flow_bb_inside_loop_p (prolog, son))
    3498          123 :                       queue.safe_push (son);
    3499          426 :                   for (auto son : queue)
    3500          123 :                     set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
    3501          101 :                 }
    3502          438 :             }
    3503              : 
    3504          438 :           e = EDGE_PRED (guard_to, 0);
    3505          438 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3506          438 :           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
    3507              : 
    3508          438 :           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
    3509          438 :           scale_loop_profile (prolog, prob_prolog,
    3510          438 :                               estimated_poly_value (bound_prolog) - 1);
    3511              :         }
    3512              : 
    3513              :       /* Update init address of DRs.  */
    3514          438 :       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
    3515          438 :       if (!uncounted_p)
    3516              :         {
    3517              :           /* Update niters for vector loop.  */
    3518          407 :           LOOP_VINFO_NITERS (loop_vinfo)
    3519          407 :             = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
    3520          407 :           LOOP_VINFO_NITERSM1 (loop_vinfo)
    3521          407 :             = fold_build2 (MINUS_EXPR, type,
    3522              :                            LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
    3523              :         }
    3524          438 :       bool new_var_p = false;
    3525          438 :       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
    3526              :       /* It's guaranteed that vector loop bound before vectorization is at
    3527              :          least VF, so set range information for newly generated var.  */
    3528          438 :       if (new_var_p)
    3529              :         {
    3530          222 :           int_range<1> vr (type,
    3531          444 :                            wi::to_wide (build_int_cst (type, lowest_vf)),
    3532          444 :                            wi::to_wide (TYPE_MAX_VALUE (type)));
    3533          222 :           set_range_info (niters, vr);
    3534          222 :         }
    3535              : 
    3536              :       /* Prolog iterates at most bound_prolog times, latch iterates at
    3537              :          most bound_prolog - 1 times.  */
    3538          438 :       if (bound_prolog.is_constant ())
    3539          438 :         record_niter_bound (prolog, bound_prolog.to_constant () - 1, false,
    3540              :                             true);
    3541          438 :       delete_update_ssa ();
    3542          438 :       adjust_vec_debug_stmts ();
    3543          438 :       scev_reset ();
    3544              :     }
    3545        33373 :   basic_block bb_before_epilog = NULL;
    3546              : 
    3547        33373 :   if (epilog_peeling)
    3548              :     {
    3549        33342 :       e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3550        33342 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
    3551              : 
    3552              :       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
    3553              :          said epilog then we should use a copy of the main loop as a starting
    3554              :          point.  This loop may have already had some preliminary transformations
    3555              :          to allow for more optimal vectorization, for example if-conversion.
    3556              :          If we are not vectorizing the epilog then we should use the scalar loop
    3557              :          as the transformations mentioned above make less or no sense when not
    3558              :          vectorizing.  */
    3559        33342 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3560        33342 :       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
    3561         7056 :       edge epilog_e = vect_epilogues ? e : scalar_e;
    3562        33342 :       edge new_epilog_e = NULL;
    3563        33342 :       auto_vec<basic_block> doms;
    3564        33342 :       epilog
    3565        33342 :         = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
    3566              :                                                   &new_epilog_e, true, &doms,
    3567              :                                                   uncounted_p);
    3568              : 
    3569        33342 :       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
    3570        33342 :       gcc_assert (epilog);
    3571        33342 :       gcc_assert (new_epilog_e);
    3572        33342 :       epilog->force_vectorize = false;
    3573        33342 :       bb_before_epilog = loop_preheader_edge (epilog)->src;
    3574              : 
    3575              :       /* Assign hierarchical discriminators to distinguish epilog loop.
    3576              :          Only assign if it's a scalar epilog.  If it will be vectorized
    3577              :          (vect_epilogues), discriminators will be assigned.
    3578              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    3579        33342 :       if (!vect_epilogues)
    3580              :         {
    3581        26286 :           gimple *epilog_last = last_nondebug_stmt (epilog->header);
    3582        26286 :           location_t epilog_loc
    3583        26286 :             = epilog_last ? gimple_location (epilog_last) : UNKNOWN_LOCATION;
    3584        26257 :           if (epilog_loc != UNKNOWN_LOCATION)
    3585              :             {
    3586        21424 :               unsigned int epilog_copyid = allocate_copyid_base (epilog_loc, 1);
    3587        21424 :               assign_discriminators_to_loop (epilog, 0, epilog_copyid);
    3588              :             }
    3589              :         }
    3590              : 
    3591              :       /* Scalar version loop may be preferred.  In this case, add guard
    3592              :          and skip to epilog.  Note this only happens when the number of
    3593              :          iterations of loop is unknown at compile time, otherwise this
    3594              :          won't be vectorized.  */
    3595        33342 :       if (skip_vector)
    3596              :         {
    3597              :           /* Additional epilogue iteration is peeled if gap exists.  */
    3598        24799 :           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
    3599        24799 :                                                 bound_prolog, bound_epilog,
    3600              :                                                 th, &bound_scalar,
    3601              :                                                 check_profitability);
    3602              :           /* Build guard against NITERSM1 since NITERS may overflow.  */
    3603        24799 :           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
    3604        24799 :           guard_bb = anchor;
    3605        24799 :           guard_to = split_edge (loop_preheader_edge (epilog));
    3606        24799 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3607              :                                            guard_to, guard_bb,
    3608              :                                            prob_vector.invert (),
    3609              :                                            irred_flag);
    3610        24799 :           skip_e = guard_e;
    3611        24799 :           e = EDGE_PRED (guard_to, 0);
    3612        24799 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3613              : 
    3614              :           /* Handle any remaining dominator updates needed after
    3615              :              inserting the loop skip edge above.  */
    3616        24799 :           if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3617          286 :               && prolog_peeling)
    3618              :             {
    3619              :               /* Adding a skip edge to skip a loop with multiple exits
    3620              :                  means the dominator of the join blocks for all exits shifts
    3621              :                  from the prolog skip guard to the loop skip guard.  */
    3622          163 :               auto prolog_skip_bb
    3623          163 :                 = single_pred (loop_preheader_edge (prolog)->src);
    3624          163 :               auto needs_update
    3625          163 :                 = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
    3626              : 
    3627              :               /* Update everything except for the immediate children of
    3628              :                  the prolog skip block (the prolog and vector preheaders).
    3629              :                  Those should remain dominated by the prolog skip block itself,
    3630              :                  since the loop guard edge goes to the epilogue.  */
    3631          866 :               for (auto bb : needs_update)
    3632          377 :                 if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest
    3633          377 :                     && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest)
    3634           51 :                   set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
    3635          163 :             }
    3636              : 
    3637        24799 :           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
    3638              : 
    3639              :           /* Simply propagate profile info from guard_bb to guard_to which is
    3640              :              a merge point of control flow.  */
    3641        24799 :           profile_count old_count = guard_to->count;
    3642        24799 :           guard_to->count = guard_bb->count;
    3643              : 
    3644              :           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
    3645        24799 :           if (vect_epilogues || scalar_loop == NULL)
    3646              :             {
    3647        22585 :               gcc_assert(epilog->num_nodes == loop->num_nodes);
    3648        22585 :               basic_block *bbs = get_loop_body (epilog);
    3649        68178 :               for (unsigned int i = 0; i < epilog->num_nodes; i++)
    3650              :                 {
    3651        45593 :                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
    3652        45593 :                   bbs[i]->count = original_counts[i];
    3653              :                 }
    3654        22585 :               free (bbs);
    3655        22585 :               free (original_bbs);
    3656              :             }
    3657         2214 :           else if (old_count.nonzero_p ())
    3658         2214 :             scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
    3659              : 
    3660              :           /* Only need to handle basic block before epilog loop if it's not
    3661              :              the guard_bb, which is the case when skip_vector is true.  */
    3662        24799 :           if (guard_bb != bb_before_epilog && single_pred_p (bb_before_epilog))
    3663        24513 :             bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
    3664        24799 :           bb_before_epilog = loop_preheader_edge (epilog)->src;
    3665              :         }
    3666              : 
    3667        33342 :       if (!uncounted_p)
    3668              :         {
    3669              :           /* If loop is peeled for non-zero constant times, now niters refers to
    3670              :              orig_niters - prolog_peeling, it won't overflow even the
    3671              :              orig_niters overflows.  */
    3672        33299 :           niters_no_overflow |= (prolog_peeling > 0);
    3673        33299 :           vect_gen_vector_loop_niters (loop_vinfo, niters,
    3674              :                                        niters_vector, step_vector,
    3675              :                                        niters_no_overflow);
    3676        33299 :           if (!integer_onep (*step_vector))
    3677              :             {
    3678              :               /* On exit from the loop we will have an easy way of calcalating
    3679              :                  NITERS_VECTOR / STEP * STEP.  Install a dummy definition
    3680              :                  until then.  */
    3681            0 :               niters_vector_mult_vf
    3682            0 :                 = make_ssa_name (TREE_TYPE (*niters_vector));
    3683            0 :               SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
    3684            0 :               *niters_vector_mult_vf_var = niters_vector_mult_vf;
    3685              :             }
    3686              :           else
    3687        33299 :             vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
    3688              :                                                  &niters_vector_mult_vf);
    3689              :           /* Update IVs of original loop as if they were advanced by
    3690              :              niters_vector_mult_vf steps.  */
    3691        33299 :           gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
    3692        33299 :           update_e = skip_vector ? e : loop_preheader_edge (epilog);
    3693              :         }
    3694        33342 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3695         1406 :         update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
    3696              : 
    3697              :       /* If we have a peeled vector iteration we will never skip the epilog loop
    3698              :          and we can simplify the cfg a lot by not doing the edge split.  */
    3699        33342 :       if (skip_epilog
    3700        33342 :           || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3701         1406 :               && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
    3702              :         {
    3703        25690 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3704              :                                     niters, niters_vector_mult_vf);
    3705              : 
    3706        25690 :           guard_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3707        25690 :           edge epilog_e = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3708        25690 :           guard_to = epilog_e->dest;
    3709        26761 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
    3710              :                                            skip_vector ? anchor : guard_bb,
    3711              :                                            prob_epilog.invert (),
    3712              :                                            irred_flag);
    3713              : 
    3714        25690 :           doms.safe_push (guard_to);
    3715        25690 :           if (vect_epilogues)
    3716         5655 :             epilogue_vinfo->skip_this_loop_edge = guard_e;
    3717        25690 :           edge main_iv = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3718        25690 :           gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
    3719        25690 :           for (gphi_iterator gsi = gsi_start_phis (guard_to);
    3720        62253 :                !gsi_end_p (gsi); gsi_next (&gsi))
    3721              :             {
    3722              :               /* We are expecting all of the PHIs we have on epilog_e
    3723              :                  to be also on the main loop exit.  But sometimes
    3724              :                  a stray virtual definition can appear at epilog_e
    3725              :                  which we can then take as the same on all exits,
    3726              :                  we've removed the LC SSA PHI on the main exit before
    3727              :                  so we wouldn't need to create a loop PHI for it.  */
    3728        36563 :               if (virtual_operand_p (gimple_phi_result (*gsi))
    3729        36563 :                   && (gsi_end_p (gsi2)
    3730        36356 :                       || !virtual_operand_p (gimple_phi_result (*gsi2))))
    3731          180 :                 add_phi_arg (*gsi,
    3732           90 :                              gimple_phi_arg_def_from_edge (*gsi, epilog_e),
    3733              :                              guard_e, UNKNOWN_LOCATION);
    3734              :               else
    3735              :                 {
    3736        36473 :                   add_phi_arg (*gsi, gimple_phi_result (*gsi2), guard_e,
    3737              :                                UNKNOWN_LOCATION);
    3738        36473 :                   gsi_next (&gsi2);
    3739              :                 }
    3740              :             }
    3741              : 
    3742              :           /* Only need to handle basic block before epilog loop if it's not
    3743              :              the guard_bb, which is the case when skip_vector is true.  */
    3744        25690 :           if (guard_bb != bb_before_epilog)
    3745              :             {
    3746        25687 :               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
    3747              : 
    3748        25687 :               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
    3749              :             }
    3750        25690 :           scale_loop_profile (epilog, prob_epilog, -1);
    3751              :         }
    3752              : 
    3753              :       /* If we have a peeled vector iteration, all exits are the same, leave it
    3754              :          and so the main exit needs to be treated the same as the alternative
    3755              :          exits in that we leave their updates to vectorizable_live_operations.
    3756              :          */
    3757        33342 :       tree vector_iters_vf = niters_vector_mult_vf;
    3758        33342 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3759              :         {
    3760         1406 :           tree tmp_niters_vf
    3761         1406 :             = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
    3762              : 
    3763         1449 :           if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
    3764         1463 :                 && get_loop_exit_edges (loop).length () == 1))
    3765              :           {
    3766         1377 :             basic_block exit_bb = NULL;
    3767         1377 :             edge update_e = NULL;
    3768              : 
    3769              :             /* Identify the early exit merge block.  I wish we had stored
    3770              :                this.  */
    3771         4131 :             for (auto e : get_loop_exit_edges (loop))
    3772         1377 :               if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
    3773              :                 {
    3774         1377 :                   exit_bb = e->dest;
    3775         1377 :                   update_e = single_succ_edge (exit_bb);
    3776         1377 :                   break;
    3777         1377 :                 }
    3778         1377 :             vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
    3779              :                                               update_e, true);
    3780              :           }
    3781         1406 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3782              :             vector_iters_vf = tmp_niters_vf;
    3783              : 
    3784         1406 :           LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
    3785              :         }
    3786              : 
    3787        33342 :         bool recalculate_peel_niters_init
    3788        33342 :           = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
    3789        33342 :         vect_update_ivs_after_vectorizer (loop_vinfo, vector_iters_vf,
    3790              :                                           update_e,
    3791              :                                           recalculate_peel_niters_init);
    3792              : 
    3793              :       /* Recalculate the dominators after adding the guard edge.  */
    3794        33342 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3795         1406 :         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    3796              : 
    3797              :       /* When we do not have a loop-around edge to the epilog we know
    3798              :          the vector loop covered at least VF scalar iterations unless
    3799              :          we have early breaks.
    3800              :          Update any known upper bound with this knowledge.  */
    3801        33342 :       if (! skip_vector
    3802         8543 :           && ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3803              :         {
    3804         7423 :           if (epilog->any_upper_bound)
    3805         7423 :             epilog->nb_iterations_upper_bound -= lowest_vf;
    3806         7423 :           if (epilog->any_likely_upper_bound)
    3807         7423 :             epilog->nb_iterations_likely_upper_bound -= lowest_vf;
    3808         7423 :           if (epilog->any_estimate)
    3809         7421 :             epilog->nb_iterations_estimate -= lowest_vf;
    3810              :         }
    3811              : 
    3812        33342 :       unsigned HOST_WIDE_INT bound;
    3813        33342 :       if (bound_scalar.is_constant (&bound))
    3814              :         {
    3815        33342 :           gcc_assert (bound != 0);
    3816              :           /* Adjust the upper bound by the extra peeled vector iteration if we
    3817              :              are an epilogue of an peeled vect loop and not VLA.  For VLA the
    3818              :              loop bounds are unknown.  */
    3819        66665 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3820        33342 :               && vf.is_constant ())
    3821           62 :             bound += vf.to_constant ();
    3822              :           /* -1 to convert loop iterations to latch iterations.  */
    3823        33342 :           record_niter_bound (epilog, bound - 1, false, true);
    3824        33342 :           scale_loop_profile (epilog, profile_probability::always (),
    3825              :                               bound - 1);
    3826              :         }
    3827              : 
    3828        33342 :       delete_update_ssa ();
    3829        33342 :       adjust_vec_debug_stmts ();
    3830        33342 :       scev_reset ();
    3831        33342 :     }
    3832              : 
    3833        33373 :   if (vect_epilogues)
    3834              :     {
    3835         7056 :       epilog->aux = epilogue_vinfo;
    3836         7056 :       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
    3837         7056 :       LOOP_VINFO_MAIN_EXIT (epilogue_vinfo)
    3838         7056 :         = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3839              : 
    3840         7056 :       loop_constraint_clear (epilog, LOOP_C_INFINITE);
    3841              : 
    3842              :       /* We now must calculate the number of NITERS performed by the previous
    3843              :          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
    3844         7056 :       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
    3845              :                                  niters_prolog, niters_vector_mult_vf);
    3846              : 
    3847              :       /* If skip_vector we may skip the previous loop, we insert a phi-node to
    3848              :          determine whether we are coming from the previous vectorized loop
    3849              :          using the update_e edge or the skip_vector basic block using the
    3850              :          skip_e edge.  */
    3851         7056 :       if (skip_vector)
    3852              :         {
    3853         5725 :           gcc_assert (update_e != NULL && skip_e != NULL);
    3854         5725 :           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
    3855              :                                            update_e->dest);
    3856         5725 :           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
    3857         5725 :           gimple *stmt = gimple_build_assign (new_ssa, niters);
    3858         5725 :           gimple_stmt_iterator gsi;
    3859         5725 :           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
    3860         5725 :               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
    3861              :             {
    3862         5725 :               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
    3863         5725 :               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
    3864              :             }
    3865              :           else
    3866              :             {
    3867            0 :               gsi = gsi_last_bb (update_e->src);
    3868            0 :               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
    3869              :             }
    3870              : 
    3871         5725 :           niters = new_ssa;
    3872         5725 :           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
    3873         5725 :           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
    3874              :                        UNKNOWN_LOCATION);
    3875         5725 :           niters = PHI_RESULT (new_phi);
    3876         5725 :           epilogue_vinfo->main_loop_edge = update_e;
    3877         5725 :           epilogue_vinfo->skip_main_loop_edge = skip_e;
    3878              :         }
    3879              : 
    3880              :       /* Set ADVANCE to the number of iterations performed by the previous
    3881              :          loop and its prologue.  */
    3882         7056 :       *advance = niters;
    3883              : 
    3884              :       /* Subtract the number of iterations performed by the vectorized loop
    3885              :          from the number of total iterations.  */
    3886         7056 :       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
    3887              :                                           before_loop_niters,
    3888              :                                           niters);
    3889              : 
    3890         7056 :       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
    3891         7056 :       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
    3892         7056 :         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
    3893              :                        epilogue_niters,
    3894              :                        build_one_cst (TREE_TYPE (epilogue_niters)));
    3895              :     }
    3896              : 
    3897        33373 :   adjust_vec.release ();
    3898        33373 :   free_original_copy_tables ();
    3899              : 
    3900        33373 :   return vect_epilogues ? epilog : NULL;
    3901        33373 : }
    3902              : 
    3903              : /* Function vect_create_cond_for_niters_checks.
    3904              : 
    3905              :    Create a conditional expression that represents the run-time checks for
    3906              :    loop's niter.  The loop is guaranteed to terminate if the run-time
    3907              :    checks hold.
    3908              : 
    3909              :    Input:
    3910              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3911              :                 with logical AND operation.  If it is NULL, then the function
    3912              :                 is used to return the number of alias checks.
    3913              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    3914              :                 to be checked.
    3915              : 
    3916              :    Output:
    3917              :    COND_EXPR - conditional expression.
    3918              : 
    3919              :    The returned COND_EXPR is the conditional expression to be used in the
    3920              :    if statement that controls which version of the loop gets executed at
    3921              :    runtime.  */
    3922              : 
    3923              : static void
    3924          388 : vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
    3925              : {
    3926          388 :   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
    3927              : 
    3928          388 :   if (*cond_expr)
    3929          388 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3930              :                               *cond_expr, part_cond_expr);
    3931              :   else
    3932            0 :     *cond_expr = part_cond_expr;
    3933          388 : }
    3934              : 
    3935              : /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    3936              :    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
    3937              : 
    3938              : static void
    3939          349 : chain_cond_expr (tree *cond_expr, tree part_cond_expr)
    3940              : {
    3941          349 :   if (*cond_expr)
    3942          345 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3943              :                               *cond_expr, part_cond_expr);
    3944              :   else
    3945            4 :     *cond_expr = part_cond_expr;
    3946          349 : }
    3947              : 
    3948              : /* Function vect_create_cond_for_align_checks.
    3949              : 
    3950              :    Create a conditional expression that represents the alignment checks for
    3951              :    all of data references (array element references) whose alignment must be
    3952              :    checked at runtime.
    3953              : 
    3954              :    Input:
    3955              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3956              :                 with logical AND operation.
    3957              :    LOOP_VINFO - three fields of the loop information are used.
    3958              :                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
    3959              :                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
    3960              :                 LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT indicates which check applies.
    3961              : 
    3962              :    Output:
    3963              :    COND_EXPR_STMT_LIST - statements needed to construct the conditional
    3964              :                          expression.
    3965              :    The returned value is the conditional expression to be used in the if
    3966              :    statement that controls which version of the loop gets executed at runtime.
    3967              : 
    3968              :    Based on the boolean value of LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT, we decide
    3969              :    which type of check should be applied and create two different expressions
    3970              :    accordingly.
    3971              :      1) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is false, we see if all data refs
    3972              :         to be checked are already aligned to an alignment boundary.  We create
    3973              :         an expression of "(a_1 | a_2 | a_3 | ... | a_n) & mask", where "a_i" is
    3974              :         the address of i'th data reference.
    3975              :      2) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we see if all data refs
    3976              :         can be aligned to a boundary after a certain amount of peeling, in other
    3977              :         words, their addresses have the same bottom bits according to the mask.
    3978              :         We create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask",
    3979              :         where "a_i" is the address of i'th data reference.
    3980              : 
    3981              :    Both algorithms make two assumptions:
    3982              :      1) The number of bytes "n" in a vector is a power of 2.
    3983              :      2) An address "a" is aligned if a%n is zero and that this
    3984              :         test can be done as a&(n-1) == 0.  For example, for 16
    3985              :         byte vectors the test is a&0xf == 0.  */
    3986              : 
    3987              : static void
    3988           43 : vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    3989              :                                    tree *cond_expr,
    3990              :                                    gimple_seq *cond_expr_stmt_list)
    3991              : {
    3992           43 :   const vec<stmt_vec_info> &may_misalign_stmts
    3993              :     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    3994           43 :   stmt_vec_info stmt_info;
    3995           43 :   poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
    3996           43 :   tree mask_cst;
    3997           43 :   unsigned int i;
    3998           43 :   tree int_ptrsize_type;
    3999           43 :   char tmp_name[30];
    4000           43 :   tree or_tmp_name = NULL_TREE;
    4001           43 :   tree prev_addr_tmp_name = NULL_TREE;
    4002           43 :   tree and_tmp_name;
    4003           43 :   gimple *and_stmt;
    4004           43 :   tree ptrsize_zero;
    4005           43 :   tree part_cond_expr;
    4006              : 
    4007           43 :   gcc_assert (known_ne (mask, 0U));
    4008              : 
    4009           43 :   int_ptrsize_type = signed_type_for (ptr_type_node);
    4010              : 
    4011              :   /* If LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we should have at least two
    4012              :      datarefs to check the mutual alignment.  */
    4013           43 :   gcc_assert (may_misalign_stmts.length () > 1
    4014              :               || !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo));
    4015              : 
    4016          103 :   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    4017              :     {
    4018           60 :       gimple_seq new_stmt_list = NULL;
    4019           60 :       tree addr_base;
    4020           60 :       tree addr_tmp_name;
    4021           60 :       tree xor_tmp_name;
    4022           60 :       tree new_or_tmp_name;
    4023           60 :       gimple *addr_stmt, *or_stmt, *xor_stmt;
    4024           60 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4025           60 :       bool negative = tree_int_cst_compare
    4026           60 :         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
    4027           60 :       tree offset = negative
    4028           60 :         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    4029              :                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    4030           60 :         : size_zero_node;
    4031              : 
    4032              :       /* create: addr_tmp = (int)(address_of_first_vector) */
    4033           60 :       addr_base =
    4034           60 :         vect_create_addr_base_for_vector_ref (loop_vinfo,
    4035              :                                               stmt_info, &new_stmt_list,
    4036              :                                               offset);
    4037           60 :       if (new_stmt_list != NULL)
    4038           14 :         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
    4039              : 
    4040           60 :       sprintf (tmp_name, "addr2int%d", i);
    4041           60 :       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    4042           60 :       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
    4043           60 :       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
    4044              : 
    4045           60 :       if (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo))
    4046              :         {
    4047              :           /* Create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask"
    4048              :              to check mutual alignment.  */
    4049           32 :           if (prev_addr_tmp_name != NULL_TREE)
    4050              :             {
    4051           16 :               sprintf (tmp_name, "xorptrs%d_%d", i - 1, i);
    4052           16 :               xor_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4053              :                                                  tmp_name);
    4054           16 :               xor_stmt = gimple_build_assign (xor_tmp_name, BIT_XOR_EXPR,
    4055              :                                               prev_addr_tmp_name,
    4056              :                                               addr_tmp_name);
    4057           16 :               gimple_seq_add_stmt (cond_expr_stmt_list, xor_stmt);
    4058           16 :               if (or_tmp_name == NULL_TREE)
    4059              :                 {
    4060              :                   /* Create the 1st XOR when the 2nd data ref is seen.  */
    4061              :                   or_tmp_name = xor_tmp_name;
    4062              :                 }
    4063              :               else
    4064              :                 {
    4065              :                   /* Create: or_tmp = or_tmp | new_xor_tmp.  */
    4066            0 :                   sprintf (tmp_name, "orxors%d", i - 1);
    4067            0 :                   new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4068              :                                                         tmp_name);
    4069            0 :                   or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4070              :                                                  or_tmp_name, xor_tmp_name);
    4071            0 :                   gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4072            0 :                   or_tmp_name = new_or_tmp_name;
    4073              :                 }
    4074              :             }
    4075              :           prev_addr_tmp_name = addr_tmp_name;
    4076              :         }
    4077              :       else
    4078              :         {
    4079              :           /* Create: "(a_1 | a_2 | a_3 | ... | a_n) & mask" to check if all
    4080              :              addresses are already aligned.  */
    4081           28 :           if (or_tmp_name != NULL_TREE)
    4082              :             {
    4083              :               /* Create: or_tmp = or_tmp | addr_tmp.  */
    4084            1 :               sprintf (tmp_name, "orptrs%d", i);
    4085            1 :               new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4086              :                                                     tmp_name);
    4087            1 :               or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4088              :                                              or_tmp_name, addr_tmp_name);
    4089            1 :               gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4090            1 :               or_tmp_name = new_or_tmp_name;
    4091              :             }
    4092              :           else
    4093              :             or_tmp_name = addr_tmp_name;
    4094              :         }
    4095              : 
    4096              :     } /* end for i */
    4097              : 
    4098           43 :   mask_cst = build_int_cst (int_ptrsize_type, mask);
    4099              : 
    4100              :   /* create: and_tmp = or_tmp & mask  */
    4101           43 :   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
    4102              : 
    4103           43 :   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
    4104              :                                   or_tmp_name, mask_cst);
    4105           43 :   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
    4106              : 
    4107              :   /* Make and_tmp the left operand of the conditional test against zero.
    4108              :      if and_tmp has a nonzero bit then some address is unaligned.  */
    4109           43 :   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
    4110           43 :   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4111              :                                 and_tmp_name, ptrsize_zero);
    4112           43 :   chain_cond_expr (cond_expr, part_cond_expr);
    4113           43 : }
    4114              : 
    4115              : /* Function vect_create_cond_for_vla_spec_read.
    4116              : 
    4117              :    Create a conditional expression that represents the run-time checks with
    4118              :    max speculative read amount in VLA modes.  We check two things:
    4119              :      1) if the max speculative read amount exceeds the min page size
    4120              :      2) if the VF is power-of-2 - done by checking the max read amount instead
    4121              : 
    4122              :    Input:
    4123              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4124              :                 with logical AND operation.
    4125              :    LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max
    4126              :                 possible speculative read amount in VLA modes.
    4127              : 
    4128              :    Output:
    4129              :    COND_EXPR - conditional expression.
    4130              : 
    4131              :    The returned COND_EXPR is the conditional expression to be used in the
    4132              :    if statement that controls which version of the loop gets executed at
    4133              :    runtime.  */
    4134              : 
    4135              : static void
    4136            0 : vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr)
    4137              : {
    4138            0 :   poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo);
    4139            0 :   tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly);
    4140              : 
    4141              :   /* Both the read amount and the VF must be variants, and the read amount must
    4142              :      be a constant power-of-2 multiple of the VF.  */
    4143            0 :   unsigned HOST_WIDE_INT multiple;
    4144            0 :   gcc_assert (!read_amount_poly.is_constant ()
    4145              :               && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    4146              :               && constant_multiple_p (read_amount_poly,
    4147              :                                       LOOP_VINFO_VECT_FACTOR (loop_vinfo),
    4148              :                                       &multiple)
    4149              :               && pow2p_hwi (multiple));
    4150              : 
    4151              :   tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, 0U);
    4152              :   tree cst_ul_one = build_int_cstu (long_unsigned_type_node, 1U);
    4153              :   tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node,
    4154              :                                          (unsigned long) param_min_pagesize);
    4155              : 
    4156              :   /* Create an expression of "amount & (amount - 1) == 0".  */
    4157              :   tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node,
    4158              :                                 amount, cst_ul_one);
    4159              :   tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node,
    4160              :                                       amount, amount_m1);
    4161              :   tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4162              :                                        amount_and_expr, cst_ul_zero);
    4163              :   chain_cond_expr (cond_expr, powof2_cond_expr);
    4164              : 
    4165              :   /* Create an expression of "amount <= cst_ul_pagesize".  */
    4166              :   tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node,
    4167              :                                          amount, cst_ul_pagesize);
    4168              :   chain_cond_expr (cond_expr, pagesize_cond_expr);
    4169              : }
    4170              : 
    4171              : /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
    4172              :    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
    4173              :    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    4174              :    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
    4175              : 
    4176              : static void
    4177         3315 : vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
    4178              : {
    4179         3315 :   const vec<vec_object_pair> &pairs
    4180              :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    4181         3315 :   unsigned int i;
    4182         3315 :   vec_object_pair *pair;
    4183         3327 :   FOR_EACH_VEC_ELT (pairs, i, pair)
    4184              :     {
    4185           12 :       tree addr1 = build_fold_addr_expr (pair->first);
    4186           12 :       tree addr2 = build_fold_addr_expr (pair->second);
    4187           12 :       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
    4188              :                                          addr1, addr2);
    4189           12 :       chain_cond_expr (cond_expr, part_cond_expr);
    4190              :     }
    4191         3315 : }
    4192              : 
    4193              : /* Create an expression that is true when all lower-bound conditions for
    4194              :    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
    4195              : 
    4196              : static void
    4197         3315 : vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
    4198              : {
    4199         3315 :   const vec<vec_lower_bound> &lower_bounds
    4200              :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    4201         3609 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    4202              :     {
    4203          294 :       tree expr = lower_bounds[i].expr;
    4204          294 :       tree type = unsigned_type_for (TREE_TYPE (expr));
    4205          294 :       expr = fold_convert (type, expr);
    4206          294 :       poly_uint64 bound = lower_bounds[i].min_value;
    4207          294 :       if (!lower_bounds[i].unsigned_p)
    4208              :         {
    4209          125 :           expr = fold_build2 (PLUS_EXPR, type, expr,
    4210              :                               build_int_cstu (type, bound - 1));
    4211          125 :           bound += bound - 1;
    4212              :         }
    4213          294 :       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
    4214              :                                          build_int_cstu (type, bound));
    4215          294 :       chain_cond_expr (cond_expr, part_cond_expr);
    4216              :     }
    4217         3315 : }
    4218              : 
    4219              : /* Function vect_create_cond_for_alias_checks.
    4220              : 
    4221              :    Create a conditional expression that represents the run-time checks for
    4222              :    overlapping of address ranges represented by a list of data references
    4223              :    relations passed as input.
    4224              : 
    4225              :    Input:
    4226              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4227              :                 with logical AND operation.  If it is NULL, then the function
    4228              :                 is used to return the number of alias checks.
    4229              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    4230              :                 to be checked.
    4231              : 
    4232              :    Output:
    4233              :    COND_EXPR - conditional expression.
    4234              : 
    4235              :    The returned COND_EXPR is the conditional expression to be used in the if
    4236              :    statement that controls which version of the loop gets executed at runtime.
    4237              : */
    4238              : 
    4239              : void
    4240         3315 : vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
    4241              : {
    4242         3315 :   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
    4243              :     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    4244              : 
    4245         3315 :   if (comp_alias_ddrs.is_empty ())
    4246              :     return;
    4247              : 
    4248         3168 :   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
    4249              :                                &comp_alias_ddrs, cond_expr);
    4250         3168 :   if (dump_enabled_p ())
    4251         1290 :     dump_printf_loc (MSG_NOTE, vect_location,
    4252              :                      "created %u versioning for alias checks.\n",
    4253              :                      comp_alias_ddrs.length ());
    4254              : }
    4255              : 
    4256              : 
    4257              : /* Function vect_loop_versioning.
    4258              : 
    4259              :    If the loop has data references that may or may not be aligned or/and
    4260              :    has data reference relations whose independence was not proven then
    4261              :    two versions of the loop need to be generated, one which is vectorized
    4262              :    and one which isn't.  A test is then generated to control which of the
    4263              :    loops is executed.  The test checks for the alignment of all of the
    4264              :    data references that may or may not be aligned.  An additional
    4265              :    sequence of runtime tests is generated for each pairs of DDRs whose
    4266              :    independence was not proven.  The vectorized version of loop is
    4267              :    executed only if both alias and alignment tests are passed.
    4268              : 
    4269              :    The test generated to check which version of loop is executed
    4270              :    is modified to also check for profitability as indicated by the
    4271              :    cost model threshold TH.
    4272              : 
    4273              :    The versioning precondition(s) are placed in *COND_EXPR and
    4274              :    *COND_EXPR_STMT_LIST.  */
    4275              : 
    4276              : class loop *
    4277         3783 : vect_loop_versioning (loop_vec_info loop_vinfo,
    4278              :                       gimple *loop_vectorized_call)
    4279              : {
    4280         3783 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
    4281         3783 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    4282         3783 :   basic_block condition_bb;
    4283         3783 :   gphi_iterator gsi;
    4284         3783 :   gimple_stmt_iterator cond_exp_gsi;
    4285         3783 :   basic_block merge_bb;
    4286         3783 :   basic_block new_exit_bb;
    4287         3783 :   edge new_exit_e, e;
    4288         3783 :   gphi *orig_phi, *new_phi;
    4289         3783 :   tree cond_expr = NULL_TREE;
    4290         3783 :   gimple_seq cond_expr_stmt_list = NULL;
    4291         3783 :   tree arg;
    4292         3783 :   profile_probability prob = profile_probability::likely ();
    4293         3783 :   gimple_seq gimplify_stmt_list = NULL;
    4294         3783 :   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
    4295         3783 :   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
    4296         3783 :   bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo);
    4297         3783 :   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
    4298         3783 :   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
    4299         3783 :   poly_uint64 versioning_threshold
    4300              :     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
    4301         3783 :   tree version_simd_if_cond
    4302              :     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
    4303         3783 :   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
    4304         3783 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    4305              : 
    4306         3783 :   if (!uncounted_p && vect_apply_runtime_profitability_check_p (loop_vinfo)
    4307              :       && !ordered_p (th, versioning_threshold))
    4308              :     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4309              :                              build_int_cst (TREE_TYPE (scalar_loop_iters),
    4310              :                                             th - 1));
    4311         3783 :   if (!uncounted_p && maybe_ne (versioning_threshold, 0U))
    4312              :     {
    4313         3771 :       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4314              :                                build_int_cst (TREE_TYPE (scalar_loop_iters),
    4315              :                                               versioning_threshold - 1));
    4316         3771 :       if (cond_expr)
    4317            0 :         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
    4318              :                                  expr, cond_expr);
    4319              :       else
    4320         3771 :         cond_expr = expr;
    4321              :     }
    4322              : 
    4323         3783 :   tree cost_name = NULL_TREE;
    4324         3783 :   profile_probability prob2 = profile_probability::always ();
    4325         3783 :   if (cond_expr
    4326         3771 :       && EXPR_P (cond_expr)
    4327         2594 :       && (version_niter
    4328         2594 :           || version_align
    4329              :           || version_alias
    4330         2248 :           || version_simd_if_cond))
    4331              :     {
    4332         2594 :       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4333              :                                                       &cond_expr_stmt_list,
    4334              :                                                       is_gimple_val, NULL_TREE);
    4335              :       /* Split prob () into two so that the overall probability of passing
    4336              :          both the cost-model and versioning checks is the orig prob.  */
    4337         2594 :       prob2 = prob = prob.sqrt ();
    4338              :     }
    4339              : 
    4340         3783 :   if (version_niter)
    4341          388 :     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
    4342              : 
    4343         3783 :   if (cond_expr)
    4344              :     {
    4345         3771 :       gimple_seq tem = NULL;
    4346         3771 :       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4347              :                                           &tem, is_gimple_condexpr_for_cond,
    4348              :                                           NULL_TREE);
    4349         3771 :       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
    4350              :     }
    4351              : 
    4352         3783 :   if (version_align)
    4353           43 :     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
    4354              :                                        &cond_expr_stmt_list);
    4355              : 
    4356         3783 :   if (version_spec_read)
    4357            0 :     vect_create_cond_for_vla_spec_read (loop_vinfo, &cond_expr);
    4358              : 
    4359         3783 :   if (version_alias)
    4360              :     {
    4361         3315 :       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
    4362         3315 :       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
    4363         3315 :       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
    4364              :     }
    4365              : 
    4366         3783 :   if (version_simd_if_cond)
    4367              :     {
    4368           58 :       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
    4369           58 :       if (flag_checking)
    4370           58 :         if (basic_block bb
    4371           58 :             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
    4372           58 :           gcc_assert (bb != loop->header
    4373              :                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
    4374              :                       && (scalar_loop == NULL
    4375              :                           || (bb != scalar_loop->header
    4376              :                               && dominated_by_p (CDI_DOMINATORS,
    4377              :                                                  scalar_loop->header, bb))));
    4378           58 :       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
    4379           58 :       tree c = fold_build2 (NE_EXPR, boolean_type_node,
    4380              :                             version_simd_if_cond, zero);
    4381           58 :       if (cond_expr)
    4382           58 :         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4383              :                                  c, cond_expr);
    4384              :       else
    4385            0 :         cond_expr = c;
    4386           58 :       if (dump_enabled_p ())
    4387            5 :         dump_printf_loc (MSG_NOTE, vect_location,
    4388              :                          "created versioning for simd if condition check.\n");
    4389              :     }
    4390              : 
    4391         3783 :   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4392              :                                       &gimplify_stmt_list,
    4393              :                                       is_gimple_condexpr_for_cond, NULL_TREE);
    4394         3783 :   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    4395              : 
    4396              :   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
    4397              :      invariant in.  */
    4398         3783 :   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
    4399         3783 :   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
    4400        70638 :        !gsi_end_p (gsi); gsi_next (&gsi))
    4401              :     {
    4402        66855 :       gimple *stmt = gsi_stmt (gsi);
    4403        66855 :       update_stmt (stmt);
    4404        66855 :       ssa_op_iter iter;
    4405        66855 :       use_operand_p use_p;
    4406        66855 :       basic_block def_bb;
    4407       153967 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    4408        87112 :         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
    4409        87112 :             && flow_bb_inside_loop_p (outermost, def_bb))
    4410         2546 :           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
    4411              :     }
    4412              : 
    4413              :   /* Search for the outermost loop we can version.  Avoid versioning of
    4414              :      non-perfect nests but allow if-conversion versioned loops inside.  */
    4415         3783 :   class loop *loop_to_version = loop;
    4416         3783 :   if (flow_loop_nested_p (outermost, loop))
    4417              :     {
    4418         1475 :       if (dump_enabled_p ())
    4419          674 :         dump_printf_loc (MSG_NOTE, vect_location,
    4420              :                          "trying to apply versioning to outer loop %d\n",
    4421              :                          outermost->num);
    4422         1475 :       if (outermost->num == 0)
    4423         1369 :         outermost = superloop_at_depth (loop, 1);
    4424              :       /* And avoid applying versioning on non-perfect nests.  */
    4425              :       while (loop_to_version != outermost
    4426          132 :              && (e = single_exit (loop_outer (loop_to_version)))
    4427          111 :              && !(e->flags & EDGE_COMPLEX)
    4428          110 :              && (!loop_outer (loop_to_version)->inner->next
    4429           63 :                  || vect_loop_vectorized_call (loop_to_version))
    4430         1579 :              && (!loop_outer (loop_to_version)->inner->next
    4431            5 :                  || !loop_outer (loop_to_version)->inner->next->next))
    4432              :         loop_to_version = loop_outer (loop_to_version);
    4433              :     }
    4434              : 
    4435              :   /* Apply versioning.  If there is already a scalar version created by
    4436              :      if-conversion re-use that.  Note we cannot re-use the copy of
    4437              :      an if-converted outer-loop when vectorizing the inner loop only.  */
    4438         3783 :   gcond *cond;
    4439         3783 :   if ((!loop_to_version->inner || loop == loop_to_version)
    4440         3743 :       && loop_vectorized_call)
    4441              :     {
    4442           91 :       gcc_assert (scalar_loop);
    4443           91 :       condition_bb = gimple_bb (loop_vectorized_call);
    4444          182 :       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
    4445           91 :       gimple_cond_set_condition_from_tree (cond, cond_expr);
    4446           91 :       update_stmt (cond);
    4447              : 
    4448           91 :       if (cond_expr_stmt_list)
    4449              :         {
    4450           91 :           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
    4451           91 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4452              :                                  GSI_SAME_STMT);
    4453              :         }
    4454              : 
    4455              :       /* if-conversion uses profile_probability::always () for both paths,
    4456              :          reset the paths probabilities appropriately.  */
    4457           91 :       edge te, fe;
    4458           91 :       extract_true_false_edges_from_block (condition_bb, &te, &fe);
    4459           91 :       te->probability = prob;
    4460           91 :       fe->probability = prob.invert ();
    4461              :       /* We can scale loops counts immediately but have to postpone
    4462              :          scaling the scalar loop because we re-use it during peeling.
    4463              : 
    4464              :          Ifcvt duplicates loop preheader, loop body and produces an basic
    4465              :          block after loop exit.  We need to scale all that.  */
    4466           91 :       basic_block preheader = loop_preheader_edge (loop_to_version)->src;
    4467           91 :       preheader->count = preheader->count.apply_probability (prob * prob2);
    4468           91 :       scale_loop_frequencies (loop_to_version, prob * prob2);
    4469              :       /* When the loop has multiple exits then we can only version itself.
    4470              :         This is denoted by loop_to_version == loop.  In this case we can
    4471              :         do the versioning by selecting the exit edge the vectorizer is
    4472              :         currently using.  */
    4473           91 :       edge exit_edge;
    4474           91 :       if (loop_to_version == loop)
    4475           91 :        exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4476              :       else
    4477            0 :        exit_edge = single_exit (loop_to_version);
    4478           91 :       exit_edge->dest->count = preheader->count;
    4479           91 :       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
    4480              : 
    4481           91 :       nloop = scalar_loop;
    4482           91 :       if (dump_enabled_p ())
    4483           96 :         dump_printf_loc (MSG_NOTE, vect_location,
    4484              :                          "reusing %sloop version created by if conversion\n",
    4485              :                          loop_to_version != loop ? "outer " : "");
    4486           91 :     }
    4487              :   else
    4488              :     {
    4489         3692 :       if (loop_to_version != loop
    4490         3692 :           && dump_enabled_p ())
    4491           14 :         dump_printf_loc (MSG_NOTE, vect_location,
    4492              :                          "applying loop versioning to outer loop %d\n",
    4493              :                          loop_to_version->num);
    4494              : 
    4495         3692 :       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
    4496              : 
    4497         3692 :       initialize_original_copy_tables ();
    4498         7384 :       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
    4499         3692 :                             prob * prob2, (prob * prob2).invert (),
    4500         3692 :                             prob * prob2, (prob * prob2).invert (),
    4501              :                             true);
    4502              : 
    4503              :       /* If the PHI nodes in the loop header were reallocated, we need to fix up
    4504              :          our internally stashed copies of those.  */
    4505         3692 :       if (loop_to_version == loop)
    4506         3652 :         for (auto gsi = gsi_start_phis (loop->header);
    4507        14528 :              !gsi_end_p (gsi); gsi_next (&gsi))
    4508        10876 :           loop_vinfo->resync_stmt_addr (gsi.phi ());
    4509              : 
    4510              :       /* We will later insert second conditional so overall outcome of
    4511              :          both is prob * prob2.  */
    4512         3692 :       edge true_e, false_e;
    4513         3692 :       extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
    4514         3692 :       true_e->probability = prob;
    4515         3692 :       false_e->probability = prob.invert ();
    4516         3692 :       gcc_assert (nloop);
    4517         3692 :       nloop = get_loop_copy (loop);
    4518              : 
    4519              :       /* Assign hierarchical discriminators to distinguish loop versions.
    4520              :          Only assign to the scalar version here; the vectorized version will
    4521              :          get discriminators later during transformation/peeling.
    4522              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    4523         3692 :       gimple *nloop_last = last_nondebug_stmt (nloop->header);
    4524         3692 :       location_t nloop_loc
    4525         3692 :         = nloop_last ? gimple_location (nloop_last) : UNKNOWN_LOCATION;
    4526         3692 :       if (nloop_loc != UNKNOWN_LOCATION)
    4527              :         {
    4528         3231 :           unsigned int nloop_copyid = allocate_copyid_base (nloop_loc, 1);
    4529         3231 :           assign_discriminators_to_loop (nloop, 0, nloop_copyid);
    4530              :         }
    4531              :       /* For cycle vectorization with SLP we rely on the PHI arguments
    4532              :          appearing in the same order as the SLP node operands which for the
    4533              :          loop PHI nodes means the preheader edge dest index needs to remain
    4534              :          the same for the analyzed loop which also becomes the vectorized one.
    4535              :          Make it so in case the state after versioning differs by redirecting
    4536              :          the first edge into the header to the same destination which moves
    4537              :          it last.  */
    4538         3692 :       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
    4539              :         {
    4540          288 :           edge e = EDGE_PRED (loop->header, 0);
    4541          288 :           ssa_redirect_edge (e, e->dest);
    4542          288 :           flush_pending_stmts (e);
    4543              :         }
    4544         3692 :       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
    4545              : 
    4546              :       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
    4547              :          reap those otherwise;  they also refer to the original
    4548              :          loops.  */
    4549              :       class loop *l = loop;
    4550         3697 :       while (gimple *call = vect_loop_vectorized_call (l))
    4551              :         {
    4552            5 :           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
    4553            5 :           fold_loop_internal_call (call, boolean_false_node);
    4554            5 :           l = loop_outer (l);
    4555            5 :         }
    4556         3692 :       free_original_copy_tables ();
    4557              : 
    4558         3692 :       if (cond_expr_stmt_list)
    4559              :         {
    4560         3611 :           cond_exp_gsi = gsi_last_bb (condition_bb);
    4561         3611 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4562              :                                  GSI_SAME_STMT);
    4563              :         }
    4564              : 
    4565              :       /* Loop versioning violates an assumption we try to maintain during
    4566              :          vectorization - that the loop exit block has a single predecessor.
    4567              :          After versioning, the exit block of both loop versions is the same
    4568              :          basic block (i.e. it has two predecessors). Just in order to simplify
    4569              :          following transformations in the vectorizer, we fix this situation
    4570              :          here by adding a new (empty) block on the exit-edge of the loop,
    4571              :          with the proper loop-exit phis to maintain loop-closed-form.
    4572              :          If loop versioning wasn't done from loop, but scalar_loop instead,
    4573              :          merge_bb will have already just a single successor.  */
    4574              : 
    4575              :       /* When the loop has multiple exits then we can only version itself.
    4576              :          This is denoted by loop_to_version == loop.  In this case we can
    4577              :          do the versioning by selecting the exit edge the vectorizer is
    4578              :          currently using.  */
    4579         3692 :       edge exit_edge;
    4580         3692 :       if (loop_to_version == loop)
    4581         3652 :         exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4582              :       else
    4583           40 :         exit_edge = single_exit (loop_to_version);
    4584              : 
    4585         3692 :       gcc_assert (exit_edge);
    4586         3692 :       merge_bb = exit_edge->dest;
    4587         3692 :       if (EDGE_COUNT (merge_bb->preds) >= 2)
    4588              :         {
    4589         3692 :           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
    4590         3692 :           new_exit_bb = split_edge (exit_edge);
    4591         3692 :           new_exit_e = exit_edge;
    4592         3692 :           e = EDGE_SUCC (new_exit_bb, 0);
    4593              : 
    4594         7633 :           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
    4595         3941 :                gsi_next (&gsi))
    4596              :             {
    4597         3941 :               tree new_res;
    4598         3941 :               orig_phi = gsi.phi ();
    4599         3941 :               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    4600         3941 :               new_phi = create_phi_node (new_res, new_exit_bb);
    4601         3941 :               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
    4602         3941 :               add_phi_arg (new_phi, arg, new_exit_e,
    4603              :                            gimple_phi_arg_location_from_edge (orig_phi, e));
    4604         3941 :               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
    4605              :             }
    4606              :         }
    4607              : 
    4608         3692 :       update_ssa (TODO_update_ssa_no_phi);
    4609              :     }
    4610              : 
    4611              :   /* Split the cost model check off to a separate BB.  Costing assumes
    4612              :      this is the only thing we perform when we enter the scalar loop
    4613              :      from a failed cost decision.  */
    4614         3783 :   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
    4615              :     {
    4616         2594 :       gimple *def = SSA_NAME_DEF_STMT (cost_name);
    4617         2594 :       gcc_assert (gimple_bb (def) == condition_bb);
    4618              :       /* All uses of the cost check are 'true' after the check we
    4619              :          are going to insert.  */
    4620         2594 :       replace_uses_by (cost_name, boolean_true_node);
    4621              :       /* And we're going to build the new single use of it.  */
    4622         2594 :       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
    4623              :                                        NULL_TREE, NULL_TREE);
    4624         2594 :       edge e = split_block (gimple_bb (def), def);
    4625         2594 :       gimple_stmt_iterator gsi = gsi_for_stmt (def);
    4626         2594 :       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
    4627         2594 :       edge true_e, false_e;
    4628         2594 :       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
    4629         2594 :       e->flags &= ~EDGE_FALLTHRU;
    4630         2594 :       e->flags |= EDGE_TRUE_VALUE;
    4631         2594 :       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
    4632         2594 :       e->probability = prob2;
    4633         2594 :       e2->probability = prob2.invert ();
    4634         2594 :       e->dest->count = e->count ();
    4635         2594 :       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
    4636         2594 :       auto_vec<basic_block, 3> adj;
    4637         2594 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
    4638         7673 :            son;
    4639         5079 :            son = next_dom_son (CDI_DOMINATORS, son))
    4640         7564 :         if (EDGE_COUNT (son->preds) > 1)
    4641         2485 :           adj.safe_push (son);
    4642        10267 :       for (auto son : adj)
    4643         2485 :         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
    4644              :       //debug_bb (condition_bb);
    4645              :       //debug_bb (e->src);
    4646         2594 :     }
    4647              : 
    4648         3783 :   if (version_niter)
    4649              :     {
    4650              :       /* The versioned loop could be infinite, we need to clear existing
    4651              :          niter information which is copied from the original loop.  */
    4652          388 :       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
    4653          388 :       vect_free_loop_info_assumptions (nloop);
    4654              :     }
    4655              : 
    4656         3783 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    4657         3783 :       && dump_enabled_p ())
    4658              :     {
    4659          816 :       if (version_alias)
    4660          747 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4661              :                          vect_location,
    4662              :                          "loop versioned for vectorization because of "
    4663              :                          "possible aliasing\n");
    4664          816 :       if (version_align)
    4665           25 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4666              :                          vect_location,
    4667              :                          "loop versioned for vectorization to enhance "
    4668              :                          "alignment\n");
    4669              : 
    4670              :     }
    4671              : 
    4672         3783 :   return nloop;
    4673              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.