LCOV - code coverage report
Current view: top level - gcc - tree-vect-loop-manip.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 81.4 % 1977 1610
Test Date: 2026-06-20 15:32:29 Functions: 85.1 % 47 40
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer Specific Loop Manipulations
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              :    and Ira Rosen <irar@il.ibm.com>
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "cfghooks.h"
      29              : #include "tree-pass.h"
      30              : #include "ssa.h"
      31              : #include "fold-const.h"
      32              : #include "cfganal.h"
      33              : #include "gimplify.h"
      34              : #include "gimple-iterator.h"
      35              : #include "gimplify-me.h"
      36              : #include "tree-cfg.h"
      37              : #include "tree-ssa-loop-manip.h"
      38              : #include "tree-into-ssa.h"
      39              : #include "tree-ssa.h"
      40              : #include "cfgloop.h"
      41              : #include "tree-scalar-evolution.h"
      42              : #include "tree-vectorizer.h"
      43              : #include "tree-ssa-loop-ivopts.h"
      44              : #include "gimple-fold.h"
      45              : #include "tree-ssa-loop-niter.h"
      46              : #include "internal-fn.h"
      47              : #include "stor-layout.h"
      48              : #include "optabs-query.h"
      49              : #include "vec-perm-indices.h"
      50              : #include "insn-config.h"
      51              : #include "rtl.h"
      52              : #include "recog.h"
      53              : #include "langhooks.h"
      54              : #include "tree-vector-builder.h"
      55              : #include "optabs-tree.h"
      56              : #include "hierarchical_discriminator.h"
      57              : 
      58              : 
      59              : /*************************************************************************
      60              :   Simple Loop Peeling Utilities
      61              : 
      62              :   Utilities to support loop peeling for vectorization purposes.
      63              :  *************************************************************************/
      64              : 
      65              : 
      66              : /* Renames the use *OP_P.  */
      67              : 
      68              : static void
      69       902457 : rename_use_op (use_operand_p op_p)
      70              : {
      71       902457 :   tree new_name;
      72              : 
      73       902457 :   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
      74              :     return;
      75              : 
      76       899024 :   new_name = get_current_def (USE_FROM_PTR (op_p));
      77              : 
      78              :   /* Something defined outside of the loop.  */
      79       899024 :   if (!new_name)
      80              :     return;
      81              : 
      82              :   /* An ordinary ssa name defined in the loop.  */
      83              : 
      84       775913 :   SET_USE (op_p, new_name);
      85              : }
      86              : 
      87              : 
      88              : /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
      89              :    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
      90              :    true.  */
      91              : 
      92              : static void
      93       110652 : rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
      94              : {
      95       110652 :   gimple *stmt;
      96       110652 :   use_operand_p use_p;
      97       110652 :   ssa_op_iter iter;
      98       110652 :   edge e;
      99       110652 :   edge_iterator ei;
     100       110652 :   class loop *loop = bb->loop_father;
     101       110652 :   class loop *outer_loop = NULL;
     102              : 
     103       110652 :   if (rename_from_outer_loop)
     104              :     {
     105          935 :       gcc_assert (loop);
     106          935 :       outer_loop = loop_outer (loop);
     107              :     }
     108              : 
     109       734559 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
     110       513255 :        gsi_next (&gsi))
     111              :     {
     112       513255 :       stmt = gsi_stmt (gsi);
     113      1263361 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
     114       750106 :         rename_use_op (use_p);
     115              :     }
     116              : 
     117       226395 :   FOR_EACH_EDGE (e, ei, bb->preds)
     118              :     {
     119       115743 :       if (!flow_bb_inside_loop_p (loop, e->src))
     120              :         {
     121        34621 :           if (!rename_from_outer_loop)
     122        34335 :             continue;
     123          286 :           if (e->src != outer_loop->header)
     124              :             {
     125          177 :               if (outer_loop->inner->next)
     126              :                 {
     127              :                   /* If outer_loop has 2 inner loops, allow there to
     128              :                      be an extra basic block which decides which of the
     129              :                      two loops to use using LOOP_VECTORIZED.  */
     130          174 :                   if (!single_pred_p (e->src)
     131           42 :                       || single_pred (e->src) != outer_loop->header)
     132          132 :                     continue;
     133              :                 }
     134              :             }
     135              :         }
     136       185292 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
     137       104016 :            gsi_next (&gsi))
     138       104016 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     139              :     }
     140       110652 : }
     141              : 
     142              : 
     143              : struct adjust_info
     144              : {
     145              :   tree from, to;
     146              :   basic_block bb;
     147              : };
     148              : 
     149              : /* A stack of values to be adjusted in debug stmts.  We have to
     150              :    process them LIFO, so that the closest substitution applies.  If we
     151              :    processed them FIFO, without the stack, we might substitute uses
     152              :    with a PHI DEF that would soon become non-dominant, and when we got
     153              :    to the suitable one, it wouldn't have anything to substitute any
     154              :    more.  */
     155              : static vec<adjust_info, va_heap> adjust_vec;
     156              : 
     157              : /* Adjust any debug stmts that referenced AI->from values to use the
     158              :    loop-closed AI->to, if the references are dominated by AI->bb and
     159              :    not by the definition of AI->from.  */
     160              : 
     161              : static void
     162        78675 : adjust_debug_stmts_now (adjust_info *ai)
     163              : {
     164        78675 :   basic_block bbphi = ai->bb;
     165        78675 :   tree orig_def = ai->from;
     166        78675 :   tree new_def = ai->to;
     167        78675 :   imm_use_iterator imm_iter;
     168        78675 :   gimple *stmt;
     169        78675 :   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
     170              : 
     171        78675 :   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
     172              : 
     173              :   /* Adjust any debug stmts that held onto non-loop-closed
     174              :      references.  */
     175       391235 :   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
     176              :     {
     177       233885 :       use_operand_p use_p;
     178       233885 :       basic_block bbuse;
     179              : 
     180       233885 :       if (!is_gimple_debug (stmt))
     181       176532 :         continue;
     182              : 
     183        57353 :       gcc_assert (gimple_debug_bind_p (stmt));
     184              : 
     185        57353 :       bbuse = gimple_bb (stmt);
     186              : 
     187        57353 :       if ((bbuse == bbphi
     188        57353 :            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
     189        59061 :           && !(bbuse == bbdef
     190          854 :                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
     191              :         {
     192            0 :           if (new_def)
     193            0 :             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
     194            0 :               SET_USE (use_p, new_def);
     195              :           else
     196              :             {
     197            0 :               gimple_debug_bind_reset_value (stmt);
     198            0 :               update_stmt (stmt);
     199              :             }
     200              :         }
     201        78675 :     }
     202        78675 : }
     203              : 
     204              : /* Adjust debug stmts as scheduled before.  */
     205              : 
     206              : static void
     207        33302 : adjust_vec_debug_stmts (void)
     208              : {
     209        33302 :   if (!MAY_HAVE_DEBUG_BIND_STMTS)
     210              :     return;
     211              : 
     212        12487 :   gcc_assert (adjust_vec.exists ());
     213              : 
     214        91054 :   while (!adjust_vec.is_empty ())
     215              :     {
     216        78567 :       adjust_debug_stmts_now (&adjust_vec.last ());
     217        78567 :       adjust_vec.pop ();
     218              :     }
     219              : }
     220              : 
     221              : /* Adjust any debug stmts that referenced FROM values to use the
     222              :    loop-closed TO, if the references are dominated by BB and not by
     223              :    the definition of FROM.  If adjust_vec is non-NULL, adjustments
     224              :    will be postponed until adjust_vec_debug_stmts is called.  */
     225              : 
     226              : static void
     227       106943 : adjust_debug_stmts (tree from, tree to, basic_block bb)
     228              : {
     229       106943 :   adjust_info ai;
     230              : 
     231       106943 :   if (MAY_HAVE_DEBUG_BIND_STMTS
     232       106943 :       && TREE_CODE (from) == SSA_NAME
     233        97414 :       && ! SSA_NAME_IS_DEFAULT_DEF (from)
     234       202999 :       && ! virtual_operand_p (from))
     235              :     {
     236        78675 :       ai.from = from;
     237        78675 :       ai.to = to;
     238        78675 :       ai.bb = bb;
     239              : 
     240        78675 :       if (adjust_vec.exists ())
     241        78567 :         adjust_vec.safe_push (ai);
     242              :       else
     243          108 :         adjust_debug_stmts_now (&ai);
     244              :     }
     245       106943 : }
     246              : 
     247              : /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
     248              :    to adjust any debug stmts that referenced the old phi arg,
     249              :    presumably non-loop-closed references left over from other
     250              :    transformations.  */
     251              : 
     252              : static void
     253       215501 : adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
     254              : {
     255       215501 :   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
     256              : 
     257       215501 :   gcc_assert (TREE_CODE (orig_def) != SSA_NAME
     258              :               || orig_def != new_def);
     259              : 
     260       215501 :   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
     261              : 
     262       215501 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
     263        83844 :     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
     264              :                         gimple_bb (update_phi));
     265       215501 : }
     266              : 
     267              : /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
     268              :    that the control should have during the first iteration and NEXT_CTRL is the
     269              :    value that it should have on subsequent iterations.  */
     270              : 
     271              : static void
     272           86 : vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
     273              :                        tree next_ctrl)
     274              : {
     275           86 :   gphi *phi = create_phi_node (ctrl, loop->header);
     276           86 :   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
     277           86 :   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
     278           86 : }
     279              : 
     280              : /* Add SEQ to the end of LOOP's preheader block.  */
     281              : 
     282              : static void
     283           18 : add_preheader_seq (class loop *loop, gimple_seq seq)
     284              : {
     285           18 :   if (seq)
     286              :     {
     287           15 :       edge pe = loop_preheader_edge (loop);
     288           15 :       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     289           15 :       gcc_assert (!new_bb);
     290              :     }
     291           18 : }
     292              : 
     293              : /* Add SEQ to the beginning of LOOP's header block.  */
     294              : 
     295              : static void
     296            0 : add_header_seq (class loop *loop, gimple_seq seq)
     297              : {
     298            0 :   if (seq)
     299              :     {
     300            0 :       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
     301            0 :       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
     302              :     }
     303            0 : }
     304              : 
     305              : /* Return true if the target can interleave elements of two vectors.
     306              :    OFFSET is 0 if the first half of the vectors should be interleaved
     307              :    or 1 if the second half should.  When returning true, store the
     308              :    associated permutation in INDICES.  */
     309              : 
     310              : static bool
     311            0 : interleave_supported_p (vec_perm_indices *indices, tree vectype,
     312              :                         unsigned int offset)
     313              : {
     314            0 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
     315            0 :   poly_uint64 base = exact_div (nelts, 2) * offset;
     316            0 :   vec_perm_builder sel (nelts, 2, 3);
     317            0 :   for (unsigned int i = 0; i < 3; ++i)
     318              :     {
     319            0 :       sel.quick_push (base + i);
     320            0 :       sel.quick_push (base + i + nelts);
     321              :     }
     322            0 :   indices->new_vector (sel, 2, nelts);
     323            0 :   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
     324            0 :                                *indices);
     325            0 : }
     326              : 
     327              : /* Try to use permutes to define the masks in DEST_RGM using the masks
     328              :    in SRC_RGM, given that the former has twice as many masks as the
     329              :    latter.  Return true on success, adding any new statements to SEQ.  */
     330              : 
     331              : static bool
     332            0 : vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
     333              :                                rgroup_controls *src_rgm)
     334              : {
     335            0 :   tree src_masktype = src_rgm->type;
     336            0 :   tree dest_masktype = dest_rgm->type;
     337            0 :   machine_mode src_mode = TYPE_MODE (src_masktype);
     338            0 :   insn_code icode1, icode2;
     339            0 :   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
     340            0 :       && (icode1 = optab_handler (vec_unpacku_hi_optab,
     341              :                                   src_mode)) != CODE_FOR_nothing
     342            0 :       && (icode2 = optab_handler (vec_unpacku_lo_optab,
     343              :                                   src_mode)) != CODE_FOR_nothing)
     344              :     {
     345              :       /* Unpacking the source masks gives at least as many mask bits as
     346              :          we need.  We can then VIEW_CONVERT any excess bits away.  */
     347            0 :       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
     348            0 :       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
     349            0 :       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
     350            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     351              :         {
     352            0 :           tree src = src_rgm->controls[i / 2];
     353            0 :           tree dest = dest_rgm->controls[i];
     354            0 :           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
     355            0 :                             ? VEC_UNPACK_HI_EXPR
     356              :                             : VEC_UNPACK_LO_EXPR);
     357            0 :           gassign *stmt;
     358            0 :           if (dest_masktype == unpack_masktype)
     359            0 :             stmt = gimple_build_assign (dest, code, src);
     360              :           else
     361              :             {
     362            0 :               tree temp = make_ssa_name (unpack_masktype);
     363            0 :               stmt = gimple_build_assign (temp, code, src);
     364            0 :               gimple_seq_add_stmt (seq, stmt);
     365            0 :               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
     366              :                                           build1 (VIEW_CONVERT_EXPR,
     367              :                                                   dest_masktype, temp));
     368              :             }
     369            0 :           gimple_seq_add_stmt (seq, stmt);
     370              :         }
     371              :       return true;
     372              :     }
     373            0 :   vec_perm_indices indices[2];
     374            0 :   if (dest_masktype == src_masktype
     375            0 :       && interleave_supported_p (&indices[0], src_masktype, 0)
     376            0 :       && interleave_supported_p (&indices[1], src_masktype, 1))
     377              :     {
     378              :       /* The destination requires twice as many mask bits as the source, so
     379              :          we can use interleaving permutes to double up the number of bits.  */
     380              :       tree masks[2];
     381            0 :       for (unsigned int i = 0; i < 2; ++i)
     382            0 :         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
     383            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     384              :         {
     385            0 :           tree src = src_rgm->controls[i / 2];
     386            0 :           tree dest = dest_rgm->controls[i];
     387            0 :           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
     388            0 :                                               src, src, masks[i & 1]);
     389            0 :           gimple_seq_add_stmt (seq, stmt);
     390              :         }
     391            0 :       return true;
     392              :     }
     393              :   return false;
     394            0 : }
     395              : 
     396              : /* Populate DEST_RGM->controls, given that they should add up to STEP.
     397              : 
     398              :      STEP = MIN_EXPR <ivtmp_34, VF>;
     399              : 
     400              :      First length (MIN (X, VF/N)):
     401              :        loop_len_15 = MIN_EXPR <STEP, VF/N>;
     402              : 
     403              :      Second length:
     404              :        tmp = STEP - loop_len_15;
     405              :        loop_len_16 = MIN (tmp, VF/N);
     406              : 
     407              :      Third length:
     408              :        tmp2 = tmp - loop_len_16;
     409              :        loop_len_17 = MIN (tmp2, VF/N);
     410              : 
     411              :      Last length:
     412              :        loop_len_18 = tmp2 - loop_len_17;
     413              : */
     414              : 
     415              : static void
     416            0 : vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
     417              :                                rgroup_controls *dest_rgm, tree step)
     418              : {
     419            0 :   tree ctrl_type = dest_rgm->type;
     420            0 :   poly_uint64 nitems_per_ctrl
     421            0 :     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
     422            0 :   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
     423              : 
     424            0 :   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     425              :     {
     426            0 :       tree ctrl = dest_rgm->controls[i];
     427            0 :       if (i == 0)
     428              :         {
     429              :           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
     430            0 :           gassign *assign
     431            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     432            0 :           gimple_seq_add_stmt (seq, assign);
     433              :         }
     434            0 :       else if (i == dest_rgm->controls.length () - 1)
     435              :         {
     436              :           /* Last iteration: Remain capped to the range [0, VF/N].  */
     437            0 :           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
     438            0 :                                                  dest_rgm->controls[i - 1]);
     439            0 :           gimple_seq_add_stmt (seq, assign);
     440              :         }
     441              :       else
     442              :         {
     443              :           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
     444            0 :           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
     445            0 :                                dest_rgm->controls[i - 1]);
     446            0 :           gassign *assign
     447            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     448            0 :           gimple_seq_add_stmt (seq, assign);
     449              :         }
     450              :     }
     451            0 : }
     452              : 
     453              : /* Stores the standard position for induction variable increment in belonging to
     454              :    LOOP_EXIT (just before the exit condition of the given exit to BSI.
     455              :    INSERT_AFTER is set to true if the increment should be inserted after
     456              :    *BSI.  */
     457              : 
     458              : void
     459        62277 : vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
     460              :                             bool *insert_after)
     461              : {
     462        62277 :   basic_block bb = loop_exit->src;
     463        62277 :   *bsi = gsi_last_bb (bb);
     464        62277 :   *insert_after = false;
     465        62277 : }
     466              : 
     467              : /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
     468              :    for all the rgroup controls in RGC and return a control that is nonzero
     469              :    when the loop needs to iterate.  Add any new preheader statements to
     470              :    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
     471              : 
     472              :    RGC belongs to loop LOOP.  The loop originally iterated NITERS
     473              :    times and has been vectorized according to LOOP_VINFO.
     474              : 
     475              :    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
     476              :    starts with NITERS_SKIP dummy iterations of the scalar loop before
     477              :    the real work starts.  The mask elements for these dummy iterations
     478              :    must be 0, to ensure that the extra iterations do not have an effect.
     479              : 
     480              :    It is known that:
     481              : 
     482              :      NITERS * RGC->max_nscalars_per_iter * RGC->factor
     483              : 
     484              :    does not overflow.  However, MIGHT_WRAP_P says whether an induction
     485              :    variable that starts at 0 and has step:
     486              : 
     487              :      VF * RGC->max_nscalars_per_iter * RGC->factor
     488              : 
     489              :    might overflow before hitting a value above:
     490              : 
     491              :      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
     492              : 
     493              :    This means that we cannot guarantee that such an induction variable
     494              :    would ever hit a value that produces a set of all-false masks or zero
     495              :    lengths for RGC.
     496              : 
     497              :    Note: the cost of the code generated by this function is modeled
     498              :    by vect_estimate_min_profitable_iters, so changes here may need
     499              :    corresponding changes there.  */
     500              : 
     501              : static tree
     502            0 : vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
     503              :                                  gimple_seq *preheader_seq,
     504              :                                  gimple_seq *header_seq,
     505              :                                  gimple_stmt_iterator loop_cond_gsi,
     506              :                                  rgroup_controls *rgc, tree niters,
     507              :                                  tree niters_skip, bool might_wrap_p,
     508              :                                  tree *iv_step, tree *compare_step)
     509              : {
     510            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     511            0 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     512            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     513              : 
     514            0 :   tree ctrl_type = rgc->type;
     515            0 :   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
     516            0 :   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
     517            0 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     518            0 :   tree length_limit = NULL_TREE;
     519              :   /* For length, we need length_limit to ensure length in range.  */
     520            0 :   if (!use_masks_p)
     521            0 :     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
     522              : 
     523              :   /* Calculate the maximum number of item values that the rgroup
     524              :      handles in total, the number that it handles for each iteration
     525              :      of the vector loop, and the number that it should skip during the
     526              :      first iteration of the vector loop.  */
     527            0 :   tree nitems_total = niters;
     528            0 :   tree nitems_step = build_int_cst (iv_type, vf);
     529            0 :   tree nitems_skip = niters_skip;
     530            0 :   if (nitems_per_iter != 1)
     531              :     {
     532              :       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
     533              :          these multiplications don't overflow.  */
     534            0 :       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
     535            0 :       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
     536            0 :       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     537              :                                    nitems_total, compare_factor);
     538            0 :       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
     539              :                                   nitems_step, iv_factor);
     540            0 :       if (nitems_skip)
     541            0 :         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     542              :                                     nitems_skip, compare_factor);
     543              :     }
     544              : 
     545              :   /* Create an induction variable that counts the number of items
     546              :      processed.  */
     547            0 :   tree index_before_incr, index_after_incr;
     548            0 :   gimple_stmt_iterator incr_gsi;
     549            0 :   bool insert_after;
     550            0 :   edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
     551            0 :   vect_iv_increment_position (exit_e, &incr_gsi, &insert_after);
     552            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
     553              :     {
     554              :       /* Create an IV that counts down from niters_total and whose step
     555              :          is the (variable) amount processed in the current iteration:
     556              :            ...
     557              :            _10 = (unsigned long) count_12(D);
     558              :            ...
     559              :            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
     560              :            _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
     561              :            ...
     562              :            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
     563              :            ...
     564              :            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
     565              :            ...
     566              :            if (ivtmp_9 > POLY_INT_CST [4, 4])
     567              :              goto <bb 4>; [83.33%]
     568              :            else
     569              :              goto <bb 5>; [16.67%]
     570              :       */
     571            0 :       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
     572            0 :       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
     573            0 :                                                : make_ssa_name (iv_type);
     574              :       /* Create decrement IV.  */
     575            0 :       if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     576              :         {
     577            0 :           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
     578              :                      insert_after, &index_before_incr, &index_after_incr);
     579            0 :           tree vectype = build_zero_cst (rgc->type);
     580            0 :           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
     581              :                                    index_before_incr, nitems_step,
     582              :                                    vectype);
     583            0 :           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
     584              :         }
     585              :       else
     586              :         {
     587            0 :           create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
     588              :                      &incr_gsi, insert_after, &index_before_incr,
     589              :                      &index_after_incr);
     590            0 :           gimple_seq_add_stmt (header_seq,
     591            0 :                                gimple_build_assign (step, MIN_EXPR,
     592              :                                                     index_before_incr,
     593              :                                                     nitems_step));
     594              :         }
     595            0 :       *iv_step = step;
     596            0 :       *compare_step = nitems_step;
     597            0 :       return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
     598            0 :                                                        : index_before_incr;
     599              :     }
     600              : 
     601              :   /* Create increment IV.  */
     602            0 :   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
     603              :              loop, &incr_gsi, insert_after, &index_before_incr,
     604              :              &index_after_incr);
     605              : 
     606            0 :   tree zero_index = build_int_cst (compare_type, 0);
     607            0 :   tree test_index, test_limit, first_limit;
     608            0 :   gimple_stmt_iterator *test_gsi;
     609            0 :   if (might_wrap_p)
     610              :     {
     611              :       /* In principle the loop should stop iterating once the incremented
     612              :          IV reaches a value greater than or equal to:
     613              : 
     614              :            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
     615              : 
     616              :          However, there's no guarantee that this addition doesn't overflow
     617              :          the comparison type, or that the IV hits a value above it before
     618              :          wrapping around.  We therefore adjust the limit down by one
     619              :          IV step:
     620              : 
     621              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     622              :            -[infinite-prec] NITEMS_STEP
     623              : 
     624              :          and compare the IV against this limit _before_ incrementing it.
     625              :          Since the comparison type is unsigned, we actually want the
     626              :          subtraction to saturate at zero:
     627              : 
     628              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     629              :            -[sat] NITEMS_STEP
     630              : 
     631              :          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
     632              : 
     633              :            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
     634              : 
     635              :          where the rightmost subtraction can be done directly in
     636              :          COMPARE_TYPE.  */
     637            0 :       test_index = index_before_incr;
     638            0 :       tree adjust = gimple_convert (preheader_seq, compare_type,
     639              :                                     nitems_step);
     640            0 :       if (nitems_skip)
     641            0 :         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     642              :                                adjust, nitems_skip);
     643            0 :       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
     644              :                                  nitems_total, adjust);
     645            0 :       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     646              :                                  test_limit, adjust);
     647            0 :       test_gsi = &incr_gsi;
     648              : 
     649              :       /* Get a safe limit for the first iteration.  */
     650            0 :       if (nitems_skip)
     651              :         {
     652              :           /* The first vector iteration can handle at most NITEMS_STEP
     653              :              items.  NITEMS_STEP <= CONST_LIMIT, and adding
     654              :              NITEMS_SKIP to that cannot overflow.  */
     655            0 :           tree const_limit = build_int_cst (compare_type,
     656            0 :                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
     657            0 :                                             * nitems_per_iter);
     658            0 :           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
     659              :                                       nitems_total, const_limit);
     660            0 :           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     661              :                                       first_limit, nitems_skip);
     662              :         }
     663              :       else
     664              :         /* For the first iteration it doesn't matter whether the IV hits
     665              :            a value above NITEMS_TOTAL.  That only matters for the latch
     666              :            condition.  */
     667              :         first_limit = nitems_total;
     668              :     }
     669              :   else
     670              :     {
     671              :       /* Test the incremented IV, which will always hit a value above
     672              :          the bound before wrapping.  */
     673            0 :       test_index = index_after_incr;
     674            0 :       test_limit = nitems_total;
     675            0 :       if (nitems_skip)
     676            0 :         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     677              :                                    test_limit, nitems_skip);
     678              :       test_gsi = &loop_cond_gsi;
     679              : 
     680              :       first_limit = test_limit;
     681              :     }
     682              : 
     683              :   /* Convert the IV value to the comparison type (either a no-op or
     684              :      a demotion).  */
     685            0 :   gimple_seq test_seq = NULL;
     686            0 :   test_index = gimple_convert (&test_seq, compare_type, test_index);
     687            0 :   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
     688              : 
     689              :   /* Provide a definition of each control in the group.  */
     690            0 :   tree next_ctrl = NULL_TREE;
     691            0 :   tree ctrl;
     692            0 :   unsigned int i;
     693            0 :   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
     694              :     {
     695              :       /* Previous controls will cover BIAS items.  This control covers the
     696              :          next batch.  */
     697            0 :       poly_uint64 bias = nitems_per_ctrl * i;
     698            0 :       tree bias_tree = build_int_cst (compare_type, bias);
     699              : 
     700              :       /* See whether the first iteration of the vector loop is known
     701              :          to have a full control.  */
     702            0 :       poly_uint64 const_limit;
     703            0 :       bool first_iteration_full
     704            0 :         = (poly_int_tree_p (first_limit, &const_limit)
     705            0 :            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
     706              : 
     707              :       /* Rather than have a new IV that starts at BIAS and goes up to
     708              :          TEST_LIMIT, prefer to use the same 0-based IV for each control
     709              :          and adjust the bound down by BIAS.  */
     710            0 :       tree this_test_limit = test_limit;
     711            0 :       if (i != 0)
     712              :         {
     713            0 :           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
     714              :                                           compare_type, this_test_limit,
     715              :                                           bias_tree);
     716            0 :           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
     717              :                                           compare_type, this_test_limit,
     718              :                                           bias_tree);
     719              :         }
     720              : 
     721              :       /* Create the initial control.  First include all items that
     722              :          are within the loop limit.  */
     723            0 :       tree init_ctrl = NULL_TREE;
     724            0 :       if (!first_iteration_full)
     725              :         {
     726            0 :           tree start, end;
     727            0 :           if (first_limit == test_limit)
     728              :             {
     729              :               /* Use a natural test between zero (the initial IV value)
     730              :                  and the loop limit.  The "else" block would be valid too,
     731              :                  but this choice can avoid the need to load BIAS_TREE into
     732              :                  a register.  */
     733              :               start = zero_index;
     734              :               end = this_test_limit;
     735              :             }
     736              :           else
     737              :             {
     738              :               /* FIRST_LIMIT is the maximum number of items handled by the
     739              :                  first iteration of the vector loop.  Test the portion
     740              :                  associated with this control.  */
     741            0 :               start = bias_tree;
     742            0 :               end = first_limit;
     743              :             }
     744              : 
     745            0 :           if (use_masks_p)
     746            0 :             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
     747              :                                         start, end, "max_mask");
     748              :           else
     749              :             {
     750            0 :               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
     751            0 :               gimple_seq seq = vect_gen_len (init_ctrl, start,
     752              :                                              end, length_limit);
     753            0 :               gimple_seq_add_seq (preheader_seq, seq);
     754              :             }
     755              :         }
     756              : 
     757              :       /* Now AND out the bits that are within the number of skipped
     758              :          items.  */
     759            0 :       poly_uint64 const_skip;
     760            0 :       if (nitems_skip
     761            0 :           && !(poly_int_tree_p (nitems_skip, &const_skip)
     762            0 :                && known_le (const_skip, bias)))
     763              :         {
     764            0 :           gcc_assert (use_masks_p);
     765            0 :           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
     766              :                                                     bias_tree, nitems_skip);
     767            0 :           if (init_ctrl)
     768            0 :             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
     769              :                                       init_ctrl, unskipped_mask);
     770              :           else
     771              :             init_ctrl = unskipped_mask;
     772              :         }
     773              : 
     774            0 :       if (!init_ctrl)
     775              :         {
     776              :           /* First iteration is full.  */
     777            0 :           if (use_masks_p)
     778            0 :             init_ctrl = build_minus_one_cst (ctrl_type);
     779              :           else
     780              :             init_ctrl = length_limit;
     781              :         }
     782              : 
     783              :       /* Get the control value for the next iteration of the loop.  */
     784            0 :       if (use_masks_p)
     785              :         {
     786            0 :           gimple_seq stmts = NULL;
     787            0 :           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
     788              :                                       this_test_limit, "next_mask");
     789            0 :           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
     790              :         }
     791              :       else
     792              :         {
     793            0 :           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
     794            0 :           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
     795              :                                          length_limit);
     796            0 :           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
     797              :         }
     798              : 
     799            0 :       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
     800              :     }
     801              : 
     802            0 :   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
     803            0 :   if (partial_load_bias != 0)
     804              :     {
     805            0 :       tree adjusted_len = rgc->bias_adjusted_ctrl;
     806            0 :       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
     807            0 :                                             rgc->controls[0],
     808              :                                             build_int_cst
     809            0 :                                             (TREE_TYPE (rgc->controls[0]),
     810            0 :                                              partial_load_bias));
     811            0 :       gimple_seq_add_stmt (header_seq, minus);
     812              :     }
     813              : 
     814              :   return next_ctrl;
     815              : }
     816              : 
     817              : /* Set up the iteration condition and rgroup controls for LOOP, given
     818              :    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
     819              :    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     820              :    the number of iterations of the original scalar loop that should be
     821              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     822              :    for vect_set_loop_condition.
     823              : 
     824              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     825              :    final gcond.  */
     826              : 
     827              : static gcond *
     828            0 : vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
     829              :                                          loop_vec_info loop_vinfo, tree niters,
     830              :                                          tree final_iv, bool niters_maybe_zero,
     831              :                                          gimple_stmt_iterator loop_cond_gsi)
     832              : {
     833            0 :   gimple_seq preheader_seq = NULL;
     834            0 :   gimple_seq header_seq = NULL;
     835              : 
     836            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     837            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     838            0 :   unsigned int compare_precision = TYPE_PRECISION (compare_type);
     839            0 :   tree orig_niters = niters;
     840              : 
     841              :   /* Type of the initial value of NITERS.  */
     842            0 :   tree ni_actual_type = TREE_TYPE (niters);
     843            0 :   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
     844            0 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     845            0 :   if (niters_skip)
     846            0 :     niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
     847              : 
     848              :   /* Convert NITERS to the same size as the compare.  */
     849            0 :   if (compare_precision > ni_actual_precision
     850            0 :       && niters_maybe_zero)
     851              :     {
     852              :       /* We know that there is always at least one iteration, so if the
     853              :          count is zero then it must have wrapped.  Cope with this by
     854              :          subtracting 1 before the conversion and adding 1 to the result.  */
     855            0 :       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
     856            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
     857              :                              niters, build_minus_one_cst (ni_actual_type));
     858            0 :       niters = gimple_convert (&preheader_seq, compare_type, niters);
     859            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
     860              :                              niters, build_one_cst (compare_type));
     861              :     }
     862              :   else
     863            0 :     niters = gimple_convert (&preheader_seq, compare_type, niters);
     864              : 
     865              :   /* Iterate over all the rgroups and fill in their controls.  We could use
     866              :      the first control from any rgroup for the loop condition; here we
     867              :      arbitrarily pick the last.  */
     868            0 :   tree test_ctrl = NULL_TREE;
     869            0 :   tree iv_step = NULL_TREE;
     870            0 :   tree compare_step = NULL_TREE;
     871            0 :   rgroup_controls *rgc;
     872            0 :   rgroup_controls *iv_rgc = nullptr;
     873            0 :   unsigned int i;
     874            0 :   auto_vec<rgroup_controls> *controls = use_masks_p
     875            0 :                                           ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
     876              :                                           : &LOOP_VINFO_LENS (loop_vinfo);
     877            0 :   FOR_EACH_VEC_ELT (*controls, i, rgc)
     878            0 :     if (!rgc->controls.is_empty ())
     879              :       {
     880              :         /* First try using permutes.  This adds a single vector
     881              :            instruction to the loop for each mask, but needs no extra
     882              :            loop invariants or IVs.  */
     883            0 :         unsigned int nmasks = i + 1;
     884            0 :         if (use_masks_p && (nmasks & 1) == 0)
     885              :           {
     886            0 :             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
     887            0 :             if (!half_rgc->controls.is_empty ()
     888            0 :                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
     889            0 :               continue;
     890              :           }
     891              : 
     892            0 :         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     893            0 :             || !iv_rgc
     894            0 :             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
     895            0 :                 != rgc->max_nscalars_per_iter * rgc->factor))
     896              :           {
     897              :             /* See whether zero-based IV would ever generate all-false masks
     898              :                or zero length before wrapping around.  */
     899            0 :             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
     900              : 
     901              :             /* Set up all controls for this group.  */
     902            0 :             test_ctrl
     903            0 :               = vect_set_loop_controls_directly (loop, loop_vinfo,
     904              :                                                  &preheader_seq, &header_seq,
     905              :                                                  loop_cond_gsi, rgc, niters,
     906              :                                                  niters_skip, might_wrap_p,
     907              :                                                  &iv_step, &compare_step);
     908              : 
     909            0 :             iv_rgc = rgc;
     910              :           }
     911              : 
     912            0 :         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     913            0 :             && rgc->controls.length () > 1)
     914              :           {
     915              :             /* vect_set_loop_controls_directly creates an IV whose step
     916              :                is equal to the expected sum of RGC->controls.  Use that
     917              :                information to populate RGC->controls.  */
     918            0 :             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     919            0 :             gcc_assert (iv_step);
     920            0 :             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
     921              :           }
     922              :       }
     923              : 
     924              :   /* Emit all accumulated statements.  */
     925            0 :   add_preheader_seq (loop, preheader_seq);
     926            0 :   add_header_seq (loop, header_seq);
     927              : 
     928              :   /* Get a boolean result that tells us whether to iterate.  */
     929            0 :   gcond *cond_stmt;
     930            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     931            0 :       && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     932              :     {
     933            0 :       gcc_assert (compare_step);
     934            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
     935            0 :       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
     936              :                                      NULL_TREE);
     937            0 :     }
     938              :   else
     939              :     {
     940            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     941            0 :       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
     942            0 :       cond_stmt
     943            0 :         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
     944              :     }
     945            0 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     946              : 
     947              :   /* The loop iterates (NITERS - 1) / VF + 1 times.
     948              :      Subtract one from this to get the latch count.  */
     949            0 :   tree step = build_int_cst (compare_type,
     950            0 :                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     951            0 :   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
     952              :                                        build_minus_one_cst (compare_type));
     953            0 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
     954              :                                      niters_minus_one, step);
     955              : 
     956            0 :   if (final_iv)
     957              :     {
     958            0 :       gassign *assign;
     959              :       /* If vectorizing an inverted early break loop we have to restart the
     960              :          scalar loop at niters - vf.  This matches what we do in
     961              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
     962            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     963              :         {
     964            0 :           tree ftype = TREE_TYPE (orig_niters);
     965            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     966            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
     967              :         }
     968              :        else
     969            0 :         assign = gimple_build_assign (final_iv, orig_niters);
     970            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
     971              :     }
     972              : 
     973            0 :   return cond_stmt;
     974              : }
     975              : 
     976              : /* Set up the iteration condition and rgroup controls for LOOP in AVX512
     977              :    style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
     978              :    vectorized loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     979              :    the number of iterations of the original scalar loop that should be
     980              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     981              :    for vect_set_loop_condition.
     982              : 
     983              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     984              :    final gcond.  */
     985              : 
     986              : static gcond *
     987           18 : vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
     988              :                                          edge exit_edge,
     989              :                                          loop_vec_info loop_vinfo, tree niters,
     990              :                                          tree final_iv,
     991              :                                          bool niters_maybe_zero,
     992              :                                          gimple_stmt_iterator loop_cond_gsi)
     993              : {
     994           18 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     995           18 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     996           18 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     997           18 :   tree orig_niters = niters;
     998           18 :   gimple_seq preheader_seq = NULL;
     999              : 
    1000              :   /* Create an IV that counts down from niters and whose step
    1001              :      is the number of iterations processed in the current iteration.
    1002              :      Produce the controls with compares like the following.
    1003              : 
    1004              :        # iv_2 = PHI <niters, iv_3>
    1005              :        rem_4 = MIN <iv_2, VF>;
    1006              :        remv_6 = { rem_4, rem_4, rem_4, ... }
    1007              :        mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
    1008              :        iv_3 = iv_2 - VF;
    1009              :        if (iv_2 > VF)
    1010              :          continue;
    1011              : 
    1012              :      Where the constant is built with elements at most VF - 1 and
    1013              :      repetitions according to max_nscalars_per_iter which is guaranteed
    1014              :      to be the same within a group.  */
    1015              : 
    1016              :   /* Convert NITERS to the determined IV type.  */
    1017           18 :   if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
    1018           18 :       && niters_maybe_zero)
    1019              :     {
    1020              :       /* We know that there is always at least one iteration, so if the
    1021              :          count is zero then it must have wrapped.  Cope with this by
    1022              :          subtracting 1 before the conversion and adding 1 to the result.  */
    1023            0 :       gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
    1024            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
    1025            0 :                              niters, build_minus_one_cst (TREE_TYPE (niters)));
    1026            0 :       niters = gimple_convert (&preheader_seq, iv_type, niters);
    1027            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
    1028              :                              niters, build_one_cst (iv_type));
    1029              :     }
    1030              :   else
    1031           18 :     niters = gimple_convert (&preheader_seq, iv_type, niters);
    1032              : 
    1033              :   /* Bias the initial value of the IV in case we need to skip iterations
    1034              :      at the beginning.  */
    1035           18 :   tree niters_adj = niters;
    1036           18 :   if (niters_skip)
    1037              :     {
    1038            1 :       tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
    1039            1 :       niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
    1040              :                                  iv_type, niters, skip);
    1041              :     }
    1042              : 
    1043              :   /* The iteration step is the vectorization factor.  */
    1044           18 :   tree iv_step = build_int_cst (iv_type, vf);
    1045              : 
    1046              :   /* Create the decrement IV.  */
    1047           18 :   tree index_before_incr, index_after_incr;
    1048           18 :   gimple_stmt_iterator incr_gsi;
    1049           18 :   bool insert_after;
    1050           18 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1051           18 :   create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
    1052              :              &incr_gsi, insert_after, &index_before_incr,
    1053              :              &index_after_incr);
    1054              : 
    1055              :   /* Iterate over all the rgroups and fill in their controls.  */
    1056           80 :   for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
    1057              :     {
    1058           26 :       if (rgc.controls.is_empty ())
    1059            6 :         continue;
    1060              : 
    1061           20 :       tree ctrl_type = rgc.type;
    1062           20 :       poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
    1063              : 
    1064           20 :       tree vectype = rgc.compare_type;
    1065              : 
    1066              :       /* index_after_incr is the IV specifying the remaining iterations in
    1067              :          the next iteration.  */
    1068           20 :       tree rem = index_after_incr;
    1069              :       /* When the data type for the compare to produce the mask is
    1070              :          smaller than the IV type we need to saturate.  Saturate to
    1071              :          the smallest possible value (IV_TYPE) so we only have to
    1072              :          saturate once (CSE will catch redundant ones we add).  */
    1073           20 :       if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
    1074            9 :         rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1075              :                             UNKNOWN_LOCATION,
    1076            9 :                             MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
    1077           20 :       rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1078           20 :                             UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
    1079              : 
    1080              :       /* Build a data vector composed of the remaining iterations.  */
    1081           20 :       rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1082              :                                           UNKNOWN_LOCATION, vectype, rem);
    1083              : 
    1084              :       /* Provide a definition of each vector in the control group.  */
    1085           20 :       tree next_ctrl = NULL_TREE;
    1086           20 :       tree first_rem = NULL_TREE;
    1087           20 :       tree ctrl;
    1088           20 :       unsigned int i;
    1089          238 :       FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
    1090              :         {
    1091              :           /* Previous controls will cover BIAS items.  This control covers the
    1092              :              next batch.  */
    1093           86 :           poly_uint64 bias = nitems_per_ctrl * i;
    1094              : 
    1095              :           /* Build the constant to compare the remaining iters against,
    1096              :              this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
    1097              :              split into pieces.  */
    1098           86 :           unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
    1099           86 :           tree_vector_builder builder (vectype, n, 1);
    1100         1266 :           for (unsigned i = 0; i < n; ++i)
    1101              :             {
    1102         1180 :               unsigned HOST_WIDE_INT val
    1103         1180 :                 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
    1104         1180 :               gcc_assert (val < vf.to_constant ());
    1105         1180 :               builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
    1106              :             }
    1107           86 :           tree cmp_series = builder.build ();
    1108              : 
    1109              :           /* Create the initial control.  First include all items that
    1110              :              are within the loop limit.  */
    1111           86 :           tree init_ctrl = NULL_TREE;
    1112           86 :           poly_uint64 const_limit;
    1113              :           /* See whether the first iteration of the vector loop is known
    1114              :              to have a full control.  */
    1115           86 :           if (poly_int_tree_p (niters, &const_limit)
    1116           86 :               && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
    1117            1 :             init_ctrl = build_minus_one_cst (ctrl_type);
    1118              :           else
    1119              :             {
    1120              :               /* The remaining work items initially are niters.  Saturate,
    1121              :                  splat and compare.  */
    1122           85 :               if (!first_rem)
    1123              :                 {
    1124           19 :                   first_rem = niters;
    1125           19 :                   if (TYPE_PRECISION (TREE_TYPE (vectype))
    1126           19 :                       < TYPE_PRECISION (iv_type))
    1127            9 :                     first_rem = gimple_build (&preheader_seq,
    1128            9 :                                               MIN_EXPR, TREE_TYPE (first_rem),
    1129              :                                               first_rem, iv_step);
    1130           19 :                   first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
    1131              :                                               first_rem);
    1132           19 :                   first_rem = gimple_build_vector_from_val (&preheader_seq,
    1133              :                                                             vectype, first_rem);
    1134              :                 }
    1135           85 :               init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
    1136              :                                         cmp_series, first_rem);
    1137              :             }
    1138              : 
    1139              :           /* Now AND out the bits that are within the number of skipped
    1140              :              items.  */
    1141           86 :           poly_uint64 const_skip;
    1142           86 :           if (niters_skip
    1143           86 :               && !(poly_int_tree_p (niters_skip, &const_skip)
    1144            1 :                    && known_le (const_skip, bias)))
    1145              :             {
    1146              :               /* For integer mode masks it's cheaper to shift out the bits
    1147              :                  since that avoids loading a constant.  */
    1148            1 :               gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
    1149            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1150            1 :                                         lang_hooks.types.type_for_mode
    1151            1 :                                           (TYPE_MODE (ctrl_type), 1),
    1152              :                                         init_ctrl);
    1153              :               /* ???  But when the shift amount isn't constant this requires
    1154              :                  a round-trip to GRPs.  We could apply the bias to either
    1155              :                  side of the compare instead.  */
    1156            2 :               tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
    1157            1 :                                          TREE_TYPE (niters_skip), niters_skip,
    1158            1 :                                          build_int_cst (TREE_TYPE (niters_skip),
    1159            1 :                                                         bias));
    1160            2 :               shift = gimple_build (&preheader_seq, MULT_EXPR,
    1161            1 :                                     TREE_TYPE (niters_skip), shift,
    1162            1 :                                     build_int_cst (TREE_TYPE (niters_skip),
    1163            1 :                                                    rgc.max_nscalars_per_iter));
    1164            1 :               init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
    1165            1 :                                         TREE_TYPE (init_ctrl),
    1166              :                                         init_ctrl, shift);
    1167            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1168              :                                         ctrl_type, init_ctrl);
    1169              :             }
    1170              : 
    1171              :           /* Get the control value for the next iteration of the loop.  */
    1172           86 :           next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1173              :                                     UNKNOWN_LOCATION,
    1174              :                                     LT_EXPR, ctrl_type, cmp_series, rem);
    1175              : 
    1176           86 :           vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
    1177           86 :         }
    1178              :     }
    1179              : 
    1180              :   /* Emit all accumulated statements.  */
    1181           18 :   add_preheader_seq (loop, preheader_seq);
    1182              : 
    1183              :   /* Adjust the exit test using the decrementing IV.  */
    1184           18 :   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
    1185              :   /* When we peel for alignment with niter_skip != 0 this can
    1186              :      cause niter + niter_skip to wrap and since we are comparing the
    1187              :      value before the decrement here we get a false early exit.
    1188              :      We can't compare the value after decrement either because that
    1189              :      decrement could wrap as well as we're not doing a saturating
    1190              :      decrement.  To avoid this situation we force a larger
    1191              :      iv_type.  */
    1192           18 :   gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
    1193              :                                         NULL_TREE, NULL_TREE);
    1194           18 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1195              : 
    1196              :   /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
    1197              :      Subtract one from this to get the latch count.  */
    1198           18 :   tree niters_minus_one
    1199           18 :     = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
    1200              :                    build_minus_one_cst (TREE_TYPE (orig_niters)));
    1201           18 :   tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
    1202           18 :   if (niters_skip)
    1203            1 :     niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
    1204              :                                fold_convert (iv_type, niters_skip));
    1205           18 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
    1206              :                                      niters_adj2, iv_step);
    1207              : 
    1208           18 :   if (final_iv)
    1209              :     {
    1210            0 :       gassign *assign;
    1211              :       /* If vectorizing an inverted early break loop we have to restart the
    1212              :          scalar loop at niters - vf.  This matches what we do in
    1213              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
    1214            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    1215              :         {
    1216            0 :           tree ftype = TREE_TYPE (orig_niters);
    1217            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    1218            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
    1219              :         }
    1220              :        else
    1221            0 :         assign = gimple_build_assign (final_iv, orig_niters);
    1222            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
    1223              :     }
    1224              : 
    1225           18 :   return cond_stmt;
    1226              : }
    1227              : 
    1228              : 
    1229              : /* Like vect_set_loop_condition, but handle the case in which the vector
    1230              :    loop handles exactly VF scalars per iteration.  */
    1231              : 
    1232              : static gcond *
    1233        62172 : vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
    1234              :                                 class loop *loop, tree niters, tree step,
    1235              :                                 tree final_iv, bool niters_maybe_zero,
    1236              :                                 gimple_stmt_iterator loop_cond_gsi)
    1237              : {
    1238        62172 :   tree indx_before_incr, indx_after_incr;
    1239        62172 :   gcond *cond_stmt;
    1240        62172 :   gcond *orig_cond;
    1241        62172 :   edge pe = loop_preheader_edge (loop);
    1242        62172 :   gimple_stmt_iterator incr_gsi;
    1243        62172 :   bool insert_after;
    1244        62172 :   enum tree_code code;
    1245        62172 :   tree niters_type = TREE_TYPE (niters);
    1246              : 
    1247        62172 :   orig_cond = get_loop_exit_condition (exit_edge);
    1248        62172 :   gcc_assert (orig_cond);
    1249        62172 :   loop_cond_gsi = gsi_for_stmt (orig_cond);
    1250              : 
    1251        62172 :   tree init, limit;
    1252        62172 :   if (!niters_maybe_zero && integer_onep (step))
    1253              :     {
    1254              :       /* In this case we can use a simple 0-based IV:
    1255              : 
    1256              :          A:
    1257              :            x = 0;
    1258              :            do
    1259              :              {
    1260              :                ...
    1261              :                x += 1;
    1262              :              }
    1263              :            while (x < NITERS);  */
    1264        62172 :       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1265        62172 :       init = build_zero_cst (niters_type);
    1266        62172 :       limit = niters;
    1267              :     }
    1268              :   else
    1269              :     {
    1270              :       /* The following works for all values of NITERS except 0:
    1271              : 
    1272              :          B:
    1273              :            x = 0;
    1274              :            do
    1275              :              {
    1276              :                ...
    1277              :                x += STEP;
    1278              :              }
    1279              :            while (x <= NITERS - STEP);
    1280              : 
    1281              :          so that the loop continues to iterate if x + STEP - 1 < NITERS
    1282              :          but stops if x + STEP - 1 >= NITERS.
    1283              : 
    1284              :          However, if NITERS is zero, x never hits a value above NITERS - STEP
    1285              :          before wrapping around.  There are two obvious ways of dealing with
    1286              :          this:
    1287              : 
    1288              :          - start at STEP - 1 and compare x before incrementing it
    1289              :          - start at -1 and compare x after incrementing it
    1290              : 
    1291              :          The latter is simpler and is what we use.  The loop in this case
    1292              :          looks like:
    1293              : 
    1294              :          C:
    1295              :            x = -1;
    1296              :            do
    1297              :              {
    1298              :                ...
    1299              :                x += STEP;
    1300              :              }
    1301              :            while (x < NITERS - STEP);
    1302              : 
    1303              :          In both cases the loop limit is NITERS - STEP.  */
    1304            0 :       gimple_seq seq = NULL;
    1305            0 :       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
    1306            0 :       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
    1307            0 :       if (seq)
    1308              :         {
    1309            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    1310            0 :           gcc_assert (!new_bb);
    1311              :         }
    1312            0 :       if (niters_maybe_zero)
    1313              :         {
    1314              :           /* Case C.  */
    1315            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1316            0 :           init = build_all_ones_cst (niters_type);
    1317              :         }
    1318              :       else
    1319              :         {
    1320              :           /* Case B.  */
    1321            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
    1322            0 :           init = build_zero_cst (niters_type);
    1323              :         }
    1324              :     }
    1325              : 
    1326        62172 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1327        62172 :   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
    1328              :              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
    1329        62172 :   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
    1330              :                                               true, NULL_TREE, true,
    1331              :                                               GSI_SAME_STMT);
    1332        62172 :   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
    1333              :                                      true, GSI_SAME_STMT);
    1334              : 
    1335        62172 :   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
    1336              :                                  NULL_TREE);
    1337              : 
    1338        62172 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1339              : 
    1340              :   /* Record the number of latch iterations.  */
    1341        62172 :   if (limit == niters)
    1342              :     /* Case A: the loop iterates NITERS times.  Subtract one to get the
    1343              :        latch count.  */
    1344        62172 :     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
    1345              :                                        build_int_cst (niters_type, 1));
    1346              :   else
    1347              :     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
    1348              :        Subtract one from this to get the latch count.  */
    1349            0 :     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
    1350              :                                        limit, step);
    1351              : 
    1352        62172 :   if (final_iv)
    1353              :     {
    1354            0 :       gassign *assign;
    1355            0 :       gcc_assert (single_pred_p (exit_edge->dest));
    1356            0 :       tree phi_dest
    1357            0 :         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
    1358              :       /* Make sure to maintain LC SSA form here and elide the subtraction
    1359              :          if the value is zero.  */
    1360            0 :       gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
    1361            0 :       add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
    1362            0 :       if (!integer_zerop (init))
    1363              :         {
    1364            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR,
    1365              :                                         phi_dest, init);
    1366            0 :           gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
    1367            0 :           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
    1368              :         }
    1369              :     }
    1370              : 
    1371        62172 :   return cond_stmt;
    1372              : }
    1373              : 
    1374              : /* If we're using fully-masked loops, make LOOP iterate:
    1375              : 
    1376              :       N == (NITERS - 1) / STEP + 1
    1377              : 
    1378              :    times.  When NITERS is zero, this is equivalent to making the loop
    1379              :    execute (1 << M) / STEP times, where M is the precision of NITERS.
    1380              :    NITERS_MAYBE_ZERO is true if this last case might occur.
    1381              : 
    1382              :    If we're not using fully-masked loops, make LOOP iterate:
    1383              : 
    1384              :       N == (NITERS - STEP) / STEP + 1
    1385              : 
    1386              :    times, where NITERS is known to be outside the range [1, STEP - 1].
    1387              :    This is equivalent to making the loop execute NITERS / STEP times
    1388              :    when NITERS is nonzero and (1 << M) / STEP times otherwise.
    1389              :    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
    1390              : 
    1391              :    If FINAL_IV is nonnull, it is an SSA name that should be set to
    1392              :    N * STEP on exit from the loop.
    1393              : 
    1394              :    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
    1395              : 
    1396              : void
    1397        62190 : vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
    1398              :                          tree niters, tree step, tree final_iv,
    1399              :                          bool niters_maybe_zero)
    1400              : {
    1401        62190 :   gcond *cond_stmt;
    1402        62190 :   gcond *orig_cond = get_loop_exit_condition (loop_e);
    1403        62190 :   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
    1404              : 
    1405              :   /* Check to see whether we will be replacing final_IV below.  Because of the
    1406              :      various replacement strategies (assign vs PHI) just remove it now and
    1407              :      leave the SSA name to be rebuild below.  */
    1408        62190 :   if (final_iv && TREE_CODE (final_iv) == SSA_NAME)
    1409              :     {
    1410            0 :       gimple *def = SSA_NAME_DEF_STMT (final_iv);
    1411            0 :       if (gimple_call_internal_p (def, IFN_VARYING))
    1412              :         {
    1413            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (def);
    1414            0 :           gsi_remove (&gsi, true);
    1415              :         }
    1416              :     }
    1417              : 
    1418        62190 :   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    1419              :     {
    1420           18 :       if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
    1421           18 :         cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
    1422              :                                                                     loop_vinfo,
    1423              :                                                                     niters, final_iv,
    1424              :                                                                     niters_maybe_zero,
    1425              :                                                                     loop_cond_gsi);
    1426              :       else
    1427            0 :         cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
    1428              :                                                              loop_vinfo,
    1429              :                                                              niters, final_iv,
    1430              :                                                              niters_maybe_zero,
    1431              :                                                              loop_cond_gsi);
    1432              :     }
    1433              :   else
    1434        62172 :     cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
    1435              :                                                 niters,
    1436              :                                                 step, final_iv,
    1437              :                                                 niters_maybe_zero,
    1438              :                                                 loop_cond_gsi);
    1439              : 
    1440              :   /* Remove old loop exit test.  */
    1441        62190 :   stmt_vec_info orig_cond_info;
    1442        62190 :   if (loop_vinfo
    1443        62190 :       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
    1444        61760 :     loop_vinfo->remove_stmt (orig_cond_info);
    1445              :   else
    1446          430 :     gsi_remove (&loop_cond_gsi, true);
    1447              : 
    1448        62190 :   if (dump_enabled_p ())
    1449        11252 :     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
    1450              :                      (gimple *) cond_stmt);
    1451        62190 : }
    1452              : 
    1453              : /* Get the virtual operand live on E.  The precondition on this is valid
    1454              :    immediate dominators and an actual virtual definition dominating E.  */
    1455              : /* ???  Costly band-aid.  For the use in question we can populate a
    1456              :    live-on-exit/end-of-BB virtual operand when copying stmts.  */
    1457              : 
    1458              : static tree
    1459           10 : get_live_virtual_operand_on_edge (edge e)
    1460              : {
    1461           10 :   basic_block bb = e->src;
    1462           22 :   do
    1463              :     {
    1464           61 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    1465              :         {
    1466           37 :           gimple *stmt = gsi_stmt (gsi);
    1467           55 :           if (gimple_vdef (stmt))
    1468           10 :             return gimple_vdef (stmt);
    1469           39 :           if (gimple_vuse (stmt))
    1470              :             return gimple_vuse (stmt);
    1471              :         }
    1472            8 :       if (gphi *vphi = get_virtual_phi (bb))
    1473            2 :         return gimple_phi_result (vphi);
    1474            6 :       bb = get_immediate_dominator (CDI_DOMINATORS, bb);
    1475            6 :     }
    1476              :   while (1);
    1477              : }
    1478              : 
    1479              : /* Given LOOP this function generates a new copy of it and puts it
    1480              :    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
    1481              :    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
    1482              :    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
    1483              :    entry or exit of LOOP.  If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
    1484              :    continuation.  This is correct for cases where one loop continues from the
    1485              :    other like in the vectorizer, but not true for uses in e.g. loop distribution
    1486              :    where the contents of the loop body are split but the iteration space of both
    1487              :    copies remains the same.
    1488              : 
    1489              :    If UPDATED_DOMS is not NULL it is update with the list of basic blocks whose
    1490              :    dominators were updated during the peeling.  When doing early break vectorization
    1491              :    then LOOP_VINFO needs to be provided and is used to keep track of any newly created
    1492              :    memory references that need to be updated should we decide to vectorize.  */
    1493              : 
    1494              : class loop *
    1495        34467 : slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
    1496              :                                         class loop *scalar_loop,
    1497              :                                         edge scalar_exit, edge e, edge *new_e,
    1498              :                                         bool flow_loops,
    1499              :                                         vec<basic_block> *updated_doms,
    1500              :                                         bool uncounted_p, bool create_main_e,
    1501              :                                         bool redirect_exits)
    1502              : {
    1503        34467 :   class loop *new_loop;
    1504        34467 :   basic_block *new_bbs, *bbs, *pbbs;
    1505        34467 :   bool at_exit;
    1506        34467 :   bool was_imm_dom;
    1507        34467 :   basic_block exit_dest;
    1508        34467 :   edge exit, new_exit;
    1509        34467 :   bool duplicate_outer_loop = false;
    1510              : 
    1511        34467 :   exit = loop_exit;
    1512        34467 :   at_exit = (e == exit);
    1513        34467 :   if (!at_exit && e != loop_preheader_edge (loop))
    1514              :     return NULL;
    1515              : 
    1516        34467 :   if (scalar_loop == NULL)
    1517              :     {
    1518        31942 :       scalar_loop = loop;
    1519        31942 :       scalar_exit = loop_exit;
    1520              :     }
    1521         2525 :   else if (scalar_loop == loop)
    1522            0 :     scalar_exit = loop_exit;
    1523              :   else
    1524              :     {
    1525              :       /* Loop has been version, match exits up using the aux index.  */
    1526         7575 :       for (edge exit : get_loop_exit_edges (scalar_loop))
    1527         2525 :         if (exit->aux == loop_exit->aux)
    1528              :           {
    1529         2525 :             scalar_exit = exit;
    1530         2525 :             break;
    1531         2525 :           }
    1532              : 
    1533         2525 :       gcc_assert (scalar_exit);
    1534              :     }
    1535              : 
    1536        34467 :   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1537        34467 :   pbbs = bbs + 1;
    1538        34467 :   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
    1539              :   /* Allow duplication of outer loops.  */
    1540        34467 :   if (scalar_loop->inner)
    1541          132 :     duplicate_outer_loop = true;
    1542              : 
    1543              :   /* Generate new loop structure.  */
    1544        34467 :   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
    1545        34467 :   duplicate_subloops (scalar_loop, new_loop);
    1546              : 
    1547        34467 :   exit_dest = exit->dest;
    1548        34467 :   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
    1549        34467 :                                           exit_dest) == exit->src ?
    1550              :                  true : false);
    1551              : 
    1552              :   /* Also copy the pre-header, this avoids jumping through hoops to
    1553              :      duplicate the loop entry PHI arguments.  Create an empty
    1554              :      pre-header unconditionally for this.  */
    1555        34467 :   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
    1556        34467 :   edge entry_e = single_pred_edge (preheader);
    1557        34467 :   bbs[0] = preheader;
    1558        34467 :   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1559              : 
    1560        34467 :   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
    1561              :             &scalar_exit, 1, &new_exit, NULL,
    1562              :             at_exit ? loop->latch : e->src, true);
    1563        34467 :   exit = loop_exit;
    1564        34467 :   basic_block new_preheader = new_bbs[0];
    1565              : 
    1566        34467 :   gcc_assert (new_exit);
    1567              : 
    1568              :   /* Record the new loop exit information.  new_loop doesn't have SCEV data and
    1569              :      so we must initialize the exit information.  */
    1570        34467 :   if (new_e)
    1571        33302 :     *new_e = new_exit;
    1572              : 
    1573              :   /* Before installing PHI arguments make sure that the edges
    1574              :      into them match that of the scalar loop we analyzed.  This
    1575              :      makes sure the SLP tree matches up between the main vectorized
    1576              :      loop and the epilogue vectorized copies.  */
    1577        34467 :   if (single_succ_edge (preheader)->dest_idx
    1578        34467 :       != single_succ_edge (new_bbs[0])->dest_idx)
    1579              :     {
    1580        29463 :       basic_block swap_bb = new_bbs[1];
    1581        29463 :       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1582        29463 :       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1583        29463 :       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1584        29463 :       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1585              :     }
    1586        34467 :   if (duplicate_outer_loop)
    1587              :     {
    1588          132 :       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
    1589          132 :       if (loop_preheader_edge (scalar_loop)->dest_idx
    1590          132 :           != loop_preheader_edge (new_inner_loop)->dest_idx)
    1591              :         {
    1592           99 :           basic_block swap_bb = new_inner_loop->header;
    1593           99 :           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1594           99 :           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1595           99 :           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1596           99 :           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1597              :         }
    1598              :     }
    1599              : 
    1600        34467 :   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
    1601              : 
    1602              :   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
    1603       179586 :   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
    1604       110652 :     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
    1605              : 
    1606              :   /* Rename the exit uses.  */
    1607       139028 :   for (edge exit : get_loop_exit_edges (new_loop))
    1608        35627 :     for (auto gsi = gsi_start_phis (exit->dest);
    1609        83962 :          !gsi_end_p (gsi); gsi_next (&gsi))
    1610              :       {
    1611        48335 :         tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
    1612        48335 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
    1613        48335 :         if (MAY_HAVE_DEBUG_BIND_STMTS)
    1614        23099 :           adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
    1615        34467 :       }
    1616              : 
    1617        34467 :   auto loop_exits = get_loop_exit_edges (loop);
    1618        34467 :   bool has_multiple_exits_p = loop_exits.length () > 1;
    1619              : 
    1620              :   /* If REDIRECT_EXITS is false we leave the alternative exits untouched and
    1621              :      treat the duplication as if the loop only had the main exit.  */
    1622        34467 :   bool redirect_multiple_exits_p = redirect_exits && has_multiple_exits_p;
    1623        34467 :   auto_vec<basic_block> doms;
    1624              : 
    1625        34467 :   if (at_exit) /* Add the loop copy at exit.  */
    1626              :     {
    1627        32872 :       if (scalar_loop != loop && new_exit->dest != exit_dest)
    1628              :         {
    1629         2521 :           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
    1630         2521 :           flush_pending_stmts (new_exit);
    1631              :         }
    1632              : 
    1633        32872 :       bool need_virtual_phi = get_virtual_phi (loop->header);
    1634              : 
    1635              :       /* For the main loop exit preserve the LC PHI nodes.  For vectorization
    1636              :          we need them to continue or finalize reductions.  Since we do not
    1637              :          copy the loop exit blocks we have to materialize PHIs at the
    1638              :          new destination before redirecting edges.  */
    1639        32872 :       for (auto gsi_from = gsi_start_phis (loop_exit->dest);
    1640        78408 :            !gsi_end_p (gsi_from); gsi_next (&gsi_from))
    1641              :         {
    1642        45536 :           tree res = gimple_phi_result (*gsi_from);
    1643        45536 :           create_phi_node (copy_ssa_name (res), new_preheader);
    1644              :         }
    1645        32872 :       edge e = redirect_edge_and_branch (loop_exit, new_preheader);
    1646        32872 :       gcc_assert (e == loop_exit);
    1647        32872 :       flush_pending_stmts (loop_exit);
    1648        32872 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
    1649              : 
    1650              :       /* If we ended up choosing an exit leading to a path not using memory
    1651              :          we can end up without a virtual LC PHI.  Create it when it is
    1652              :          needed because of the epilog loop continuation.  */
    1653        32872 :       if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
    1654              :         {
    1655            8 :           tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
    1656            8 :           gphi *vphi = create_phi_node (copy_ssa_name (header_def),
    1657              :                                         new_preheader);
    1658            8 :           add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
    1659              :                        loop_exit, UNKNOWN_LOCATION);
    1660              :         }
    1661              : 
    1662        32872 :       basic_block main_loop_exit_block = new_preheader;
    1663        32872 :       basic_block alt_loop_exit_block = new_preheader;
    1664              :       /* When we redirect the other exits create the CFG
    1665              :          below to funnel everything through the merge block:
    1666              :                    | loop_exit               | alt1   | altN
    1667              :                    v                         v   ...  v
    1668              :             main_loop_exit_block:       alt_loop_exit_block:
    1669              :                    |                      /
    1670              :                    v                     v
    1671              :             new_preheader:
    1672              :          where in the new preheader we need merge PHIs for
    1673              :          the continuation values into the epilogue header.
    1674              :          Do not bother with exit PHIs for the early exits but
    1675              :          their live virtual operand.  We'll fix up things below.  */
    1676        32872 :       if (redirect_multiple_exits_p || uncounted_p)
    1677              :         {
    1678          644 :           edge loop_e = single_succ_edge (new_preheader);
    1679          644 :           new_preheader = split_edge (loop_e);
    1680              : 
    1681          644 :         if (redirect_exits)
    1682              :           {
    1683          638 :             gphi *vphi = NULL;
    1684          638 :             alt_loop_exit_block = new_preheader;
    1685         3312 :             for (auto exit : loop_exits)
    1686         1398 :               if (exit != loop_exit)
    1687              :                 {
    1688          760 :                   tree vphi_def = NULL_TREE;
    1689          760 :                   if (gphi *evphi = get_virtual_phi (exit->dest))
    1690          452 :                     vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
    1691          760 :                   edge res
    1692          760 :                     = redirect_edge_and_branch (exit, alt_loop_exit_block);
    1693          760 :                   gcc_assert (res == exit);
    1694          760 :                   redirect_edge_var_map_clear (exit);
    1695              : 
    1696          760 :                   if (alt_loop_exit_block == new_preheader)
    1697          615 :                     alt_loop_exit_block = split_edge (exit);
    1698          760 :                   if (!need_virtual_phi)
    1699          316 :                     continue;
    1700              : 
    1701              :                   /* When the edge has no virtual LC PHI get at the live
    1702              :                      virtual operand by other means.  */
    1703          444 :                   if (!vphi_def)
    1704            2 :                     vphi_def = get_live_virtual_operand_on_edge (exit);
    1705              : 
    1706          444 :                   if (!vphi)
    1707          410 :                     vphi = create_phi_node (copy_ssa_name (vphi_def),
    1708              :                                           alt_loop_exit_block);
    1709              :                   else
    1710              :                     /* Edge redirection might re-allocate the PHI node
    1711              :                        so we have to rediscover it.  */
    1712           34 :                     vphi = get_virtual_phi (alt_loop_exit_block);
    1713          444 :                   add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
    1714              :                 }
    1715              :           }
    1716              : 
    1717          644 :           set_immediate_dominator (CDI_DOMINATORS, new_preheader,
    1718              :                                    loop->header);
    1719              : 
    1720              :           /* Fix up the profile counts of the new exit blocks.
    1721              :              main_loop_exit_block was created by duplicating the
    1722              :              preheader, so needs its count scaling according to the main
    1723              :              exit edge's probability.  The remaining count from the
    1724              :              preheader goes to the alt_loop_exit_block, since all
    1725              :              alternative exits have been redirected there.  */
    1726          644 :           main_loop_exit_block->count = loop_exit->count ();
    1727          644 :           alt_loop_exit_block->count
    1728          644 :             = preheader->count - main_loop_exit_block->count;
    1729              :         }
    1730              : 
    1731              :       /* Adjust the epilog loop PHI entry values to continue iteration.
    1732              :          This adds remaining necessary LC PHI nodes to the main exit
    1733              :          and creates merge PHIs when we have multiple exits with
    1734              :          their appropriate continuation.  */
    1735        32872 :       if (flow_loops)
    1736              :         {
    1737        32872 :           edge loop_entry = single_succ_edge (new_preheader);
    1738        32872 :           bool peeled_iters = (uncounted_p
    1739        32872 :                                || single_pred (loop->latch) != loop_exit->src);
    1740              : 
    1741              :           /* Record the new SSA names in the cache so that we can skip
    1742              :              materializing them again when we fill in the rest of the LC SSA
    1743              :              variables.  */
    1744        32872 :           hash_map <tree, tree> new_phi_args;
    1745        32872 :           for (auto psi = gsi_start_phis (main_loop_exit_block);
    1746        78416 :                !gsi_end_p (psi); gsi_next (&psi))
    1747              :             {
    1748        45544 :               gphi *phi = *psi;
    1749        45544 :               tree new_arg = gimple_phi_arg_def_from_edge (phi, loop_exit);
    1750        45544 :               if (TREE_CODE (new_arg) != SSA_NAME)
    1751          313 :                 continue;
    1752              : 
    1753              :               /* If the loop doesn't have a virtual def then only possibly keep
    1754              :                  the epilog LC PHI for it and avoid creating new defs.  */
    1755        45321 :               if (virtual_operand_p (new_arg) && !need_virtual_phi)
    1756              :                 {
    1757           90 :                   auto gsi = gsi_for_stmt (phi);
    1758           90 :                   remove_phi_node (&gsi, true);
    1759           90 :                   continue;
    1760           90 :                 }
    1761              : 
    1762              :               /* If we decided not to remove the PHI node we should also not
    1763              :                  rematerialize it later on.  */
    1764        45231 :               new_phi_args.put (new_arg, gimple_phi_result (phi));
    1765              :             }
    1766              : 
    1767              :           /* Create the merge PHI nodes in new_preheader and populate the
    1768              :              arguments for the exits.  */
    1769        32872 :           if (redirect_multiple_exits_p)
    1770              :             {
    1771          615 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1772          615 :                    gsi_to = gsi_start_phis (new_loop->header);
    1773         2236 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1774         1621 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1775              :                 {
    1776         1621 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1777         1621 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1778              : 
    1779              :                   /* When the vector loop is peeled then we need to use the
    1780              :                      value at start of the loop, otherwise the main loop exit
    1781              :                      should use the final iter value.  */
    1782         1621 :                   tree new_arg;
    1783         1621 :                   if (peeled_iters)
    1784           71 :                     new_arg = gimple_phi_result (from_phi);
    1785              :                   else
    1786         1550 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1787              :                                                      loop_latch_edge (loop));
    1788              : 
    1789              :                   /* Check if we've already created a new phi node during edge
    1790              :                      redirection and re-use it if so.  Otherwise create a
    1791              :                      LC PHI node to feed the merge PHI.  */
    1792         1621 :                   tree *res;
    1793         3242 :                   if (virtual_operand_p (new_arg))
    1794              :                     {
    1795              :                       /* Use the existing virtual LC SSA from exit block.  */
    1796          410 :                       gphi *vphi = get_virtual_phi (main_loop_exit_block);
    1797          410 :                       new_arg = gimple_phi_result (vphi);
    1798              :                     }
    1799         1211 :                   else if ((res = new_phi_args.get (new_arg)))
    1800          105 :                     new_arg = *res;
    1801              :                   else
    1802              :                     {
    1803              :                       /* Create the LC PHI node for the exit.  */
    1804         1106 :                       tree new_def = copy_ssa_name (new_arg);
    1805         1106 :                       gphi *lc_phi
    1806         1106 :                           = create_phi_node (new_def, main_loop_exit_block);
    1807         1106 :                       SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
    1808         1106 :                       new_arg = new_def;
    1809              :                     }
    1810              : 
    1811              :                   /* Create the PHI node in the merge block merging the
    1812              :                      main and early exit values.  */
    1813         1621 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1814         1621 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1815         1621 :                   edge main_e = single_succ_edge (main_loop_exit_block);
    1816         1621 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
    1817              : 
    1818              :                   /* And adjust the epilog entry value.  */
    1819         1621 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1820              :                 }
    1821              :             }
    1822              : 
    1823        32872 :           if (redirect_multiple_exits_p)
    1824              :             {
    1825              :               /* After creating the merge PHIs handle the early exits those
    1826              :                  should use the values at the start of the loop.  */
    1827          615 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1828          615 :                    gsi_to = gsi_start_phis (new_preheader);
    1829         2236 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1830         1621 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1831              :                 {
    1832         1621 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1833         1621 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1834              : 
    1835              :                   /* Now update the virtual PHI nodes with the right value.  */
    1836         1621 :                   tree alt_arg = gimple_phi_result (from_phi);
    1837         3242 :                   if (virtual_operand_p (alt_arg))
    1838              :                     {
    1839          410 :                       gphi *vphi = get_virtual_phi (alt_loop_exit_block);
    1840          410 :                       alt_arg = gimple_phi_result (vphi);
    1841              :                     }
    1842              :                   /* For other live args we didn't create LC PHI nodes.
    1843              :                      Do so here.  */
    1844              :                   else
    1845              :                     {
    1846         1211 :                       tree alt_def = copy_ssa_name (alt_arg);
    1847         1211 :                       gphi *lc_phi
    1848         1211 :                         = create_phi_node (alt_def, alt_loop_exit_block);
    1849         2711 :                       for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
    1850              :                            ++i)
    1851         1500 :                         SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
    1852              :                       alt_arg = alt_def;
    1853              :                     }
    1854              : 
    1855              :                   /* The merge PHIs live in NEW_PREHEADER; their
    1856              :                      alternative argument always comes from the
    1857              :                      successor edge of ALT_LOOP_EXIT_BLOCK.  */
    1858         1621 :                   edge alt_e = single_succ_edge (alt_loop_exit_block);
    1859         1621 :                   SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
    1860              :                 }
    1861              :             }
    1862              : 
    1863              :           /* For the single exit case only create the missing LC PHI nodes
    1864              :              for the continuation of the loop IVs that are not also already
    1865              :              reductions and thus had LC PHI nodes on the exit already.  When
    1866              :              we are not redirecting the alternative exits the layout is:
    1867              : 
    1868              :                    loop_exit ---> new_preheader ---> epilog
    1869              :                    alt_exit ---------------> original dest
    1870              :            */
    1871          615 :           if (!redirect_multiple_exits_p)
    1872              :             {
    1873        32257 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1874        32257 :                    gsi_to = gsi_start_phis (new_loop->header);
    1875       122931 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1876        90674 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1877              :                 {
    1878        90674 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1879        90674 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1880        90674 :                   tree new_arg;
    1881              : 
    1882              :                   /* Use the value on the exiting path.  When the exit is from
    1883              :                      the latch edge we want the post-iteration value on that
    1884              :                      edge; when the exit is from the loop header (before the
    1885              :                      latch ever executes) we must use the current header value,
    1886              :                      otherwise we pick up a name that was never defined.  */
    1887        90674 :                   if (!has_multiple_exits_p && !uncounted_p)
    1888        90393 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1889              :                                                      loop_latch_edge (loop));
    1890              :                   else
    1891          281 :                     new_arg = gimple_phi_result (from_phi);
    1892              : 
    1893              :                   /* Re-use the virtual LC PHI we already built when we are not
    1894              :                      redirecting the other exits to avoid creating duplicate
    1895              :                      virtual SSA names.  */
    1896       181348 :                   if (virtual_operand_p (new_arg))
    1897              :                     {
    1898        24478 :                       if (gphi *vphi = get_virtual_phi (main_loop_exit_block))
    1899              :                         {
    1900        24478 :                           adjust_phi_and_debug_stmts (to_phi, loop_entry,
    1901              :                                                       gimple_phi_result (vphi));
    1902        42532 :                           continue;
    1903              :                         }
    1904              :                     }
    1905              : 
    1906              :                   /* Check if we've already created a new phi node during edge
    1907              :                      redirection.  If we have, only propagate the value
    1908              :                      downwards.  */
    1909        66196 :                   if (tree *res = new_phi_args.get (new_arg))
    1910              :                     {
    1911              :                       /* Check if the new dest block already contains a use.  */
    1912        18054 :                       gimple *stmt = SSA_NAME_DEF_STMT (*res);
    1913              : 
    1914              :                       /* If the value already exist, just update the destination
    1915              :                          and if it doesn't we want a new node.  */
    1916        18054 :                       if (gimple_bb (stmt) == main_loop_exit_block)
    1917              :                         {
    1918        18054 :                           adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
    1919        18054 :                           continue;
    1920              :                         }
    1921              :                       else
    1922            0 :                         new_arg = *res;
    1923              :                     }
    1924              : 
    1925        48142 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1926        48142 :                   gphi *lcssa_phi = create_phi_node (new_res, main_loop_exit_block);
    1927        48142 :                   SET_PHI_ARG_DEF (lcssa_phi, loop_exit->dest_idx, new_arg);
    1928        48142 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1929              :                 }
    1930              :             }
    1931        32872 :         }
    1932              : 
    1933        32872 :       if (was_imm_dom || duplicate_outer_loop)
    1934        32598 :         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
    1935              : 
    1936              :       /* And remove the non-necessary forwarder again.  Keep the other
    1937              :          one so we have a proper pre-header for the loop at the exit edge.  */
    1938        32872 :       redirect_edge_pred (single_succ_edge (preheader),
    1939              :                           single_pred (preheader));
    1940        32872 :       delete_basic_block (preheader);
    1941        32872 :       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1942        32872 :                                loop_preheader_edge (scalar_loop)->src);
    1943              : 
    1944              :       /* Finally after wiring the new epilogue we need to update its main exit
    1945              :          to the original function exit we recorded.  Other exits are already
    1946              :          correct.  */
    1947        32872 :       if (has_multiple_exits_p || uncounted_p)
    1948              :         {
    1949          827 :           class loop *update_loop = new_loop;
    1950          827 :           doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
    1951        19217 :           for (unsigned i = 0; i < doms.length (); ++i)
    1952        18390 :             if (flow_bb_inside_loop_p (loop, doms[i]))
    1953         2585 :               doms.unordered_remove (i);
    1954              : 
    1955         4251 :           for (edge e : get_loop_exit_edges (update_loop))
    1956              :             {
    1957         1770 :               edge ex;
    1958         1770 :               edge_iterator ei;
    1959         3583 :               FOR_EACH_EDGE (ex, ei, e->dest->succs)
    1960              :                 {
    1961              :                   /* Find the first non-fallthrough block as fall-throughs can't
    1962              :                      dominate other blocks.  */
    1963         1813 :                   if (single_succ_p (ex->dest))
    1964              :                     {
    1965          957 :                       doms.safe_push (ex->dest);
    1966          957 :                       ex = single_succ_edge (ex->dest);
    1967              :                     }
    1968         1813 :                   doms.safe_push (ex->dest);
    1969              :                 }
    1970         1770 :               doms.safe_push (e->dest);
    1971          827 :             }
    1972              : 
    1973          827 :           iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    1974          827 :           if (updated_doms)
    1975          827 :             updated_doms->safe_splice (doms);
    1976              :         }
    1977              :     }
    1978              :   else /* Add the copy at entry.  */
    1979              :     {
    1980              :       /* Copy the current loop LC PHI nodes between the original loop exit
    1981              :          block and the new loop header.  This allows us to later split the
    1982              :          preheader block and still find the right LC nodes.  */
    1983         1595 :       if (flow_loops)
    1984          430 :         for (auto gsi_from = gsi_start_phis (new_loop->header),
    1985          430 :              gsi_to = gsi_start_phis (loop->header);
    1986         1349 :              !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1987          919 :              gsi_next (&gsi_from), gsi_next (&gsi_to))
    1988              :           {
    1989          919 :             gimple *from_phi = gsi_stmt (gsi_from);
    1990          919 :             gimple *to_phi = gsi_stmt (gsi_to);
    1991          919 :             tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1992              :                                                   loop_latch_edge (new_loop));
    1993          919 :             adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
    1994              :                                         new_arg);
    1995              :           }
    1996              : 
    1997         1595 :       if (scalar_loop != loop)
    1998              :         {
    1999              :           /* Remove the non-necessary forwarder of scalar_loop again.  */
    2000            4 :           redirect_edge_pred (single_succ_edge (preheader),
    2001              :                               single_pred (preheader));
    2002            4 :           delete_basic_block (preheader);
    2003            4 :           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    2004            4 :                                    loop_preheader_edge (scalar_loop)->src);
    2005            4 :           preheader = split_edge (loop_preheader_edge (loop));
    2006            4 :           entry_e = single_pred_edge (preheader);
    2007              :         }
    2008              : 
    2009         1595 :       redirect_edge_and_branch_force (entry_e, new_preheader);
    2010         1595 :       flush_pending_stmts (entry_e);
    2011         1595 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
    2012              : 
    2013              : 
    2014              :       /* `vect_set_loop_condition' replaces the condition in the main exit of
    2015              :          loop.  For counted loops, this is the IV counting exit, so in the case
    2016              :          of the prolog loop, we are replacing the old IV counting exit limit of
    2017              :          total loop niters for the new limit of the prolog niters, as desired.
    2018              :          For uncounted loops, we don't have an IV-counting exit to replace, so
    2019              :          we add a dummy exit to be consumed by `vect_set_loop_condition' later
    2020              :          on.  */
    2021         1595 :       if (create_main_e)
    2022              :         {
    2023           31 :           edge to_latch_e = single_pred_edge (new_loop->latch);
    2024           31 :           bool latch_is_false = to_latch_e->flags & EDGE_FALSE_VALUE ? true
    2025              :                                                                      : false;
    2026              : 
    2027              :           /* Add new bb for duplicate exit.  */
    2028           31 :           basic_block bbcond = split_edge (to_latch_e);
    2029           31 :           gimple_stmt_iterator a = gsi_last_bb (bbcond);
    2030              : 
    2031              :           /* Fix flags for the edge leading to the latch.  */
    2032           31 :           to_latch_e = find_edge (bbcond, new_loop->latch);
    2033           31 :           to_latch_e->flags &= ~EDGE_FALLTHRU;
    2034           31 :           to_latch_e->flags |= latch_is_false ? EDGE_FALSE_VALUE
    2035              :                                               : EDGE_TRUE_VALUE;
    2036              : 
    2037              :           /* Build the condition.  */
    2038           31 :           tree cone = build_int_cst (sizetype, 1);
    2039           31 :           tree czero = build_int_cst (sizetype, 0);
    2040           31 :           gcond *cond_copy = gimple_build_cond (NE_EXPR, cone, czero, NULL_TREE,
    2041              :                                                 NULL_TREE);
    2042              : 
    2043           31 :           gsi_insert_after (&a, cond_copy, GSI_NEW_STMT);
    2044              : 
    2045              :           /* Add edge for exiting the loop via new condition.  */
    2046           38 :           edge dup_exit = make_edge (bbcond, new_exit->dest, latch_is_false
    2047              :                                 ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE);
    2048              : 
    2049           31 :           profile_probability probability = profile_probability::even ();
    2050           31 :           to_latch_e->probability = dup_exit->probability = probability;
    2051              : 
    2052           31 :           set_immediate_dominator (CDI_DOMINATORS, dup_exit->src,
    2053              :                                    new_exit->src);
    2054           31 :           new_exit = dup_exit;
    2055           31 :           *new_e = new_exit;
    2056              :         }
    2057              : 
    2058         1595 :       redirect_edge_and_branch_force (new_exit, preheader);
    2059         1595 :       flush_pending_stmts (new_exit);
    2060         1595 :       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
    2061              : 
    2062              :       /* And remove the non-necessary forwarder again.  Keep the other
    2063              :          one so we have a proper pre-header for the loop at the exit edge.  */
    2064         1595 :       redirect_edge_pred (single_succ_edge (new_preheader),
    2065              :                           single_pred (new_preheader));
    2066         1595 :       delete_basic_block (new_preheader);
    2067         1595 :       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
    2068         1595 :                                loop_preheader_edge (new_loop)->src);
    2069              : 
    2070              :       /* Update dominators for multiple exits.  */
    2071         1595 :       if (has_multiple_exits_p || create_main_e)
    2072              :         {
    2073         1113 :           for (edge alt_e : loop_exits)
    2074              :             {
    2075          441 :               if ((alt_e == loop_exit) && !create_main_e)
    2076          193 :                 continue;
    2077          248 :               basic_block old_dom
    2078          248 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    2079          248 :               if (flow_bb_inside_loop_p (loop, old_dom))
    2080              :                 {
    2081          103 :                   auto_vec<basic_block, 8> queue;
    2082          103 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    2083          337 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    2084          234 :                     if (!flow_bb_inside_loop_p (loop, son))
    2085          131 :                       queue.safe_push (son);
    2086          440 :                   for (auto son : queue)
    2087          131 :                     set_immediate_dominator (CDI_DOMINATORS,
    2088              :                                              son, get_bb_copy (old_dom));
    2089          103 :                 }
    2090              :             }
    2091              :         }
    2092              : 
    2093              :       /* When loop_exit != scalar_exit due to if-conversion loop versioning,
    2094              :          the `scalar_exit' now has two incoming edges, one from the if-converted
    2095              :          and one from the peeled prolog loop.  It is therefore dominated by a
    2096              :          common block between these.  Update its dominator accordingly.  */
    2097          224 :       if (create_main_e && loop_exit != scalar_exit)
    2098            0 :         set_immediate_dominator (CDI_DOMINATORS, scalar_exit->dest,
    2099              :                                  recompute_dominator (CDI_DOMINATORS,
    2100              :                                                       scalar_exit->dest));
    2101              :     }
    2102              : 
    2103        34467 :   free (new_bbs);
    2104        34467 :   free (bbs);
    2105              : 
    2106        34467 :   checking_verify_dominators (CDI_DOMINATORS);
    2107              : 
    2108        34467 :   return new_loop;
    2109        34467 : }
    2110              : 
    2111              : 
    2112              : /* Given the condition expression COND, put it as the last statement of
    2113              :    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
    2114              :    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
    2115              :    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
    2116              :    new edge as irreducible if IRREDUCIBLE_P is true.  */
    2117              : 
    2118              : static edge
    2119        50663 : slpeel_add_loop_guard (basic_block guard_bb, tree cond,
    2120              :                        basic_block guard_to, basic_block dom_bb,
    2121              :                        profile_probability probability, bool irreducible_p)
    2122              : {
    2123        50663 :   gimple_stmt_iterator gsi;
    2124        50663 :   edge new_e, enter_e;
    2125        50663 :   gcond *cond_stmt;
    2126        50663 :   gimple_seq gimplify_stmt_list = NULL;
    2127              : 
    2128        50663 :   enter_e = EDGE_SUCC (guard_bb, 0);
    2129        50663 :   enter_e->flags &= ~EDGE_FALLTHRU;
    2130        50663 :   enter_e->flags |= EDGE_FALSE_VALUE;
    2131        50663 :   gsi = gsi_last_bb (guard_bb);
    2132              : 
    2133        50663 :   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
    2134              :                                  is_gimple_condexpr_for_cond, NULL_TREE);
    2135        50663 :   if (gimplify_stmt_list)
    2136        22873 :     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
    2137              : 
    2138        50663 :   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
    2139        50663 :   gsi = gsi_last_bb (guard_bb);
    2140        50663 :   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
    2141              : 
    2142              :   /* Add new edge to connect guard block to the merge/loop-exit block.  */
    2143        50663 :   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
    2144              : 
    2145        50663 :   new_e->probability = probability;
    2146        50663 :   if (irreducible_p)
    2147           16 :     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
    2148              : 
    2149        50663 :   enter_e->probability = probability.invert ();
    2150        50663 :   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
    2151              : 
    2152              :   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
    2153        50663 :   if (enter_e->dest->loop_father->header == enter_e->dest)
    2154          476 :     split_edge (enter_e);
    2155              : 
    2156        50663 :   return new_e;
    2157              : }
    2158              : 
    2159              : 
    2160              : /* This function verifies that the following restrictions apply to LOOP:
    2161              :    (1) it consists of exactly 2 basic blocks - header, and an empty latch
    2162              :        for innermost loop and 5 basic blocks for outer-loop.
    2163              :    (2) it is single entry, single exit
    2164              :    (3) its exit condition is the last stmt in the header
    2165              :    (4) E is the entry/exit edge of LOOP.
    2166              :  */
    2167              : 
    2168              : bool
    2169       495902 : slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
    2170              :                              const_edge e)
    2171              : {
    2172       495902 :   edge entry_e = loop_preheader_edge (loop);
    2173       495902 :   gcond *orig_cond = get_loop_exit_condition (exit_e);
    2174       495902 :   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
    2175              : 
    2176              :   /* All loops have an outer scope; the only case loop->outer is NULL is for
    2177              :      the function itself.  */
    2178       495902 :   if (!loop_outer (loop)
    2179       495902 :       || !empty_block_p (loop->latch)
    2180              :       || !exit_e
    2181              :       /* Verify that new loop exit condition can be trivially modified.  */
    2182       495902 :       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
    2183       991804 :       || (e != exit_e && e != entry_e))
    2184            0 :     return false;
    2185              : 
    2186       495902 :   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
    2187       495902 :   get_loop_body_with_size (loop, bbs, loop->num_nodes);
    2188       495902 :   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
    2189       495902 :   free (bbs);
    2190       495902 :   return ret;
    2191              : }
    2192              : 
    2193              : /* Function find_loop_location.
    2194              : 
    2195              :    Extract the location of the loop in the source code.
    2196              :    If the loop is not well formed for vectorization, an estimated
    2197              :    location is calculated.
    2198              :    Return the loop location if succeed and NULL if not.  */
    2199              : 
    2200              : dump_user_location_t
    2201      3549643 : find_loop_location (class loop *loop)
    2202              : {
    2203      3549643 :   gimple *stmt = NULL;
    2204      3549643 :   basic_block bb;
    2205      3549643 :   gimple_stmt_iterator si;
    2206              : 
    2207      3549643 :   if (!loop)
    2208            0 :     return dump_user_location_t ();
    2209              : 
    2210              :   /* For the root of the loop tree return the function location.  */
    2211      3549643 :   if (!loop_outer (loop))
    2212            0 :     return dump_user_location_t::from_function_decl (cfun->decl);
    2213              : 
    2214      3549643 :   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
    2215              :     {
    2216              :       /* We only care about the loop location, so use any exit with location
    2217              :          information.  */
    2218     11138344 :       for (edge e : get_loop_exit_edges (loop))
    2219              :         {
    2220      3644791 :           stmt = get_loop_exit_condition (e);
    2221              : 
    2222      3644791 :           if (stmt
    2223      3644791 :               && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2224      3095758 :             return stmt;
    2225      3549643 :         }
    2226              :     }
    2227              : 
    2228              :   /* If we got here the loop is probably not "well formed",
    2229              :      try to estimate the loop location */
    2230              : 
    2231       453885 :   if (!loop->header)
    2232            0 :     return dump_user_location_t ();
    2233              : 
    2234       453885 :   bb = loop->header;
    2235              : 
    2236      1610089 :   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    2237              :     {
    2238      1036733 :       stmt = gsi_stmt (si);
    2239      1036733 :       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2240       334414 :         return stmt;
    2241              :     }
    2242              : 
    2243       119471 :   return dump_user_location_t ();
    2244              : }
    2245              : 
    2246              : /* Return true if the phi described by STMT_INFO defines an IV of the
    2247              :    loop to be vectorized.  */
    2248              : 
    2249              : static bool
    2250      1331193 : iv_phi_p (stmt_vec_info stmt_info)
    2251              : {
    2252      1331193 :   gphi *phi = as_a <gphi *> (stmt_info->stmt);
    2253      2662386 :   if (virtual_operand_p (PHI_RESULT (phi)))
    2254              :     return false;
    2255              : 
    2256      1054644 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
    2257      1054644 :       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
    2258       157323 :     return false;
    2259              : 
    2260              :   return true;
    2261              : }
    2262              : 
    2263              : /* Return true if vectorizer can peel for nonlinear iv.  */
    2264              : static bool
    2265         7780 : vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
    2266              :                               stmt_vec_info stmt_info)
    2267              : {
    2268         7780 :   enum vect_induction_op_type induction_type
    2269              :     = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
    2270         7780 :   tree niters_skip;
    2271              :   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
    2272              :      if niters or vf is unknown:
    2273              :      For shift, when shift mount >= precision, there would be UD.
    2274              :      For mult, don't known how to generate
    2275              :      init_expr * pow (step, niters) for variable niters.
    2276              :      For neg unknown niters are ok, since niters of vectorized main loop
    2277              :      will always be multiple of 2.
    2278              :      See also PR113163,  PR114196 and PR114485.  */
    2279         7780 :   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    2280         7780 :       || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    2281         7780 :       || (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2282         3114 :           && induction_type != vect_step_op_neg))
    2283              :     {
    2284         2930 :       if (dump_enabled_p ())
    2285           12 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2286              :                          "Peeling for epilogue is not supported"
    2287              :                          " for this nonlinear induction"
    2288              :                          " when iteration count is unknown or"
    2289              :                          " when using partial vectorization.\n");
    2290         2930 :       return false;
    2291              :     }
    2292              : 
    2293         4850 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    2294          266 :       && induction_type == vect_step_op_mul)
    2295              :     {
    2296           24 :       if (dump_enabled_p ())
    2297            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2298              :                          "Peeling for is not supported for nonlinear mult"
    2299              :                          " induction using partial vectorization.\n");
    2300           24 :       return false;
    2301              :     }
    2302              : 
    2303              :   /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
    2304         4584 :   if (induction_type == vect_step_op_mul)
    2305              :     {
    2306          409 :       tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
    2307          409 :       tree type = TREE_TYPE (step_expr);
    2308              : 
    2309          812 :       if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
    2310          409 :           && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
    2311              :         {
    2312            6 :           if (dump_enabled_p ())
    2313            6 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2314              :                              "Avoid compile time hog on"
    2315              :                              " vect_peel_nonlinear_iv_init"
    2316              :                              " for nonlinear induction vec_step_op_mul"
    2317              :                              " when iteration count is too big.\n");
    2318            6 :           return false;
    2319              :         }
    2320              :     }
    2321              : 
    2322              :   /* Also doesn't support peel for neg when niter is variable.
    2323              :      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
    2324         4820 :   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
    2325         4820 :   if ((niters_skip != NULL_TREE
    2326            0 :        && (TREE_CODE (niters_skip) != INTEGER_CST
    2327            0 :            || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
    2328         4820 :       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
    2329         4820 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
    2330              :     {
    2331            4 :       if (dump_enabled_p ())
    2332            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2333              :                          "Peeling for alignment is not supported"
    2334              :                          " for nonlinear induction when niters_skip"
    2335              :                          " is not constant.\n");
    2336            4 :       return false;
    2337              :     }
    2338              : 
    2339              :   return true;
    2340              : }
    2341              : 
    2342              : /* Function vect_can_advance_ivs_p
    2343              : 
    2344              :    In case the number of iterations that LOOP iterates is unknown at compile
    2345              :    time, an epilog loop will be generated, and the loop induction variables
    2346              :    (IVs) will be "advanced" to the value they are supposed to take just before
    2347              :    the epilog loop.  Here we check that the access function of the loop IVs
    2348              :    and the expression that represents the loop bound are simple enough.
    2349              :    These restrictions will be relaxed in the future.  */
    2350              : 
    2351              : bool
    2352       499427 : vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
    2353              : {
    2354       499427 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2355       499427 :   basic_block bb = loop->header;
    2356       499427 :   gphi_iterator gsi;
    2357              : 
    2358              :   /* Analyze phi functions of the loop header.  */
    2359              : 
    2360       499427 :   if (dump_enabled_p ())
    2361        25196 :     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
    2362      1732706 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2363              :     {
    2364      1237277 :       tree evolution_part;
    2365      1237277 :       enum vect_induction_op_type induction_type;
    2366              : 
    2367      1237277 :       gphi *phi = gsi.phi ();
    2368      1237277 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2369      1237277 :       if (dump_enabled_p ())
    2370        72263 :         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
    2371              :                          phi_info->stmt);
    2372              : 
    2373              :       /* Skip virtual phi's. The data dependences that are associated with
    2374              :          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
    2375              : 
    2376              :          Skip reduction phis.  */
    2377      1237277 :       if (!iv_phi_p (phi_info))
    2378              :         {
    2379       390842 :           if (dump_enabled_p ())
    2380        25668 :             dump_printf_loc (MSG_NOTE, vect_location,
    2381              :                              "reduc or virtual phi. skip.\n");
    2382       390842 :           continue;
    2383              :         }
    2384              : 
    2385       846435 :       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2386       846435 :       if (induction_type != vect_step_op_add)
    2387              :         {
    2388         7780 :           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
    2389              :             return false;
    2390              : 
    2391         4816 :           continue;
    2392              :         }
    2393              : 
    2394              :       /* Analyze the evolution function.  */
    2395              : 
    2396       838655 :       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2397       838655 :       if (evolution_part == NULL_TREE)
    2398              :         {
    2399         1002 :           if (dump_enabled_p ())
    2400           81 :             dump_printf (MSG_MISSED_OPTIMIZATION,
    2401              :                          "No access function or evolution.\n");
    2402         1002 :           return false;
    2403              :         }
    2404              : 
    2405              :       /* FORNOW: We do not transform initial conditions of IVs
    2406              :          which evolution functions are not invariants in the loop.  */
    2407              : 
    2408       837653 :       if (!expr_invariant_in_loop_p (loop, evolution_part))
    2409              :         {
    2410           32 :           if (dump_enabled_p ())
    2411            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2412              :                              "evolution not invariant in loop.\n");
    2413           32 :           return false;
    2414              :         }
    2415              : 
    2416              :       /* FORNOW: We do not transform initial conditions of IVs
    2417              :          which evolution functions are a polynomial of degree >= 2.  */
    2418              : 
    2419      2070900 :       if (tree_is_chrec (evolution_part))
    2420              :         {
    2421            0 :           if (dump_enabled_p ())
    2422            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2423              :                              "evolution is chrec.\n");
    2424            0 :           return false;
    2425              :         }
    2426              :     }
    2427              : 
    2428              :   return true;
    2429              : }
    2430              : 
    2431              : 
    2432              : /*   Function vect_update_ivs_after_vectorizer.
    2433              : 
    2434              :      "Advance" the induction variables of LOOP to the value they should take
    2435              :      after the execution of LOOP.  This is currently necessary because the
    2436              :      vectorizer does not handle induction variables that are used after the
    2437              :      loop.  Such a situation occurs when the last iterations of LOOP are
    2438              :      peeled, because:
    2439              :      1. We introduced new uses after LOOP for IVs that were not originally used
    2440              :         after LOOP: the IVs of LOOP are now used by an epilog loop.
    2441              :      2. LOOP is going to be vectorized; this means that it will iterate N/VF
    2442              :         times, whereas the loop IVs should be bumped N times.
    2443              : 
    2444              :      Input:
    2445              :      - LOOP - a loop that is going to be vectorized. The last few iterations
    2446              :               of LOOP were peeled.
    2447              :      - NITERS - the number of iterations that LOOP executes (before it is
    2448              :                 vectorized). i.e, the number of times the ivs should be bumped.
    2449              :      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
    2450              :                   coming out from LOOP on which there are uses of the LOOP ivs
    2451              :                   (this is the path from LOOP->exit to epilog_loop->preheader).
    2452              : 
    2453              :                   The new definitions of the ivs are placed in LOOP->exit.
    2454              :                   The phi args associated with the edge UPDATE_E in the bb
    2455              :                   UPDATE_E->dest are updated accordingly.
    2456              : 
    2457              :      - EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
    2458              :                       the main latch exit.
    2459              : 
    2460              :      Assumption 1: Like the rest of the vectorizer, this function assumes
    2461              :      a single loop exit that has a single predecessor.
    2462              : 
    2463              :      Assumption 2: The phi nodes in the LOOP header and in update_bb are
    2464              :      organized in the same order.
    2465              : 
    2466              :      Assumption 3: The access function of the ivs is simple enough (see
    2467              :      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
    2468              : 
    2469              :      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
    2470              :      coming out of LOOP on which the ivs of LOOP are used (this is the path
    2471              :      that leads to the epilog loop; other paths skip the epilog loop).  This
    2472              :      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
    2473              :      needs to have its phis updated.
    2474              :  */
    2475              : 
    2476              : static void
    2477        33487 : vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    2478              :                                   tree niters, edge update_e,
    2479              :                                   bool early_exit_p)
    2480              : {
    2481        33487 :   gphi_iterator gsi, gsi1;
    2482        33487 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2483        33487 :   basic_block update_bb = update_e->dest;
    2484        33487 :   basic_block exit_bb = update_e->src;
    2485              :   /* Check to see if this is an empty loop pre-header block.  If it exists
    2486              :      we need to use the edge from that block -> loop header for updates but
    2487              :      must use the original exit_bb to add any new adjustment because there
    2488              :      can be a skip_epilog edge bypassing the epilog and so the loop pre-header
    2489              :      too.  */
    2490        33562 :   if (empty_block_p (update_bb) && single_succ_p (update_bb))
    2491              :     {
    2492           75 :       update_e = single_succ_edge (update_bb);
    2493           75 :       update_bb = update_e->dest;
    2494              :     }
    2495        33487 :   gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
    2496              : 
    2497        33487 :   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
    2498       127403 :        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
    2499        93916 :        gsi_next (&gsi), gsi_next (&gsi1))
    2500              :     {
    2501        93916 :       tree init_expr;
    2502        93916 :       tree step_expr, off;
    2503        93916 :       tree type;
    2504        93916 :       tree var, ni, ni_name;
    2505              : 
    2506        93916 :       gphi *phi = gsi.phi ();
    2507        93916 :       gphi *phi1 = gsi1.phi ();
    2508        93916 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2509        93916 :       if (dump_enabled_p ())
    2510        12239 :         dump_printf_loc (MSG_NOTE, vect_location,
    2511              :                          "vect_update_ivs_after_vectorizer: phi: %G",
    2512              :                          (gimple *) phi);
    2513              : 
    2514              :       /* Skip reduction and virtual phis.  */
    2515        93916 :       if (!iv_phi_p (phi_info))
    2516              :         {
    2517        43030 :           if (dump_enabled_p ())
    2518         4762 :             dump_printf_loc (MSG_NOTE, vect_location,
    2519              :                              "reduc or virtual phi. skip.\n");
    2520        43030 :           continue;
    2521              :         }
    2522              : 
    2523        50886 :       type = TREE_TYPE (gimple_phi_result (phi));
    2524        50886 :       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2525        50886 :       step_expr = unshare_expr (step_expr);
    2526              : 
    2527              :       /* FORNOW: We do not support IVs whose evolution function is a polynomial
    2528              :          of degree >= 2 or exponential.  */
    2529        50886 :       gcc_assert (!tree_is_chrec (step_expr));
    2530              : 
    2531        50886 :       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    2532        50886 :       gimple_seq stmts = NULL;
    2533        50886 :       enum vect_induction_op_type induction_type
    2534              :         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2535              : 
    2536        50886 :       if (induction_type == vect_step_op_add)
    2537              :         {
    2538        50758 :           tree stype = TREE_TYPE (step_expr);
    2539        50758 :           off = fold_build2 (MULT_EXPR, stype,
    2540              :                                fold_convert (stype, niters), step_expr);
    2541              : 
    2542        50758 :           if (POINTER_TYPE_P (type))
    2543         3586 :             ni = fold_build_pointer_plus (init_expr, off);
    2544              :           else
    2545        47172 :             ni = fold_convert (type,
    2546              :                                fold_build2 (PLUS_EXPR, stype,
    2547              :                                             fold_convert (stype, init_expr),
    2548              :                                             off));
    2549              :         }
    2550              :       /* Don't bother call vect_peel_nonlinear_iv_init.  */
    2551          128 :       else if (induction_type == vect_step_op_neg)
    2552              :         ni = init_expr;
    2553              :       else
    2554           84 :         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
    2555              :                                           niters, step_expr,
    2556              :                                           induction_type, early_exit_p);
    2557              : 
    2558        50886 :       var = create_tmp_var (type, "tmp");
    2559              : 
    2560        50886 :       gimple_seq new_stmts = NULL;
    2561        50886 :       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
    2562              : 
    2563              :       /* Exit_bb shouldn't be empty, but we also can't insert after a ctrl
    2564              :          statements.  */
    2565        50886 :       if (!gsi_end_p (last_gsi) && !is_ctrl_stmt (gsi_stmt (last_gsi)))
    2566              :         {
    2567          253 :           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
    2568          253 :           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
    2569              :         }
    2570              :       else
    2571              :         {
    2572        50633 :           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
    2573        50633 :           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
    2574              :         }
    2575              : 
    2576              :       /* Update the PHI argument on the requested edge.  */
    2577        50886 :       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
    2578              :     }
    2579        33487 : }
    2580              : 
    2581              : /* Return a gimple value containing the misalignment (measured in vector
    2582              :    elements) for the loop described by LOOP_VINFO, i.e. how many elements
    2583              :    it is away from a perfectly aligned address.  Add any new statements
    2584              :    to SEQ.  */
    2585              : 
    2586              : static tree
    2587          204 : get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
    2588              : {
    2589          204 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2590          204 :   stmt_vec_info stmt_info = dr_info->stmt;
    2591          204 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2592              : 
    2593          204 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2594          204 :   unsigned HOST_WIDE_INT target_align_c;
    2595          204 :   tree target_align_minus_1;
    2596              : 
    2597          204 :   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2598          204 :                                         size_zero_node) < 0;
    2599          204 :   tree offset = (negative
    2600          204 :                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    2601              :                              * TREE_INT_CST_LOW
    2602              :                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    2603          201 :                  : size_zero_node);
    2604          204 :   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
    2605              :                                                           stmt_info, seq,
    2606              :                                                           offset);
    2607          204 :   tree type = unsigned_type_for (TREE_TYPE (start_addr));
    2608          204 :   if (target_align.is_constant (&target_align_c))
    2609          204 :     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
    2610              :   else
    2611              :     {
    2612              :       tree vla = build_int_cst (type, target_align);
    2613              :       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla,
    2614              :                                           build_int_cst (type, 1));
    2615              :     }
    2616              : 
    2617          204 :   HOST_WIDE_INT elem_size
    2618          204 :     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2619          408 :   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
    2620              : 
    2621              :   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
    2622          204 :   tree int_start_addr = fold_convert (type, start_addr);
    2623          204 :   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
    2624              :                                         target_align_minus_1);
    2625              : 
    2626              :   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
    2627          204 :   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
    2628              :                                         elem_size_log);
    2629              : 
    2630          204 :   return misalign_in_elems;
    2631              : }
    2632              : 
    2633              : /* Function vect_gen_prolog_loop_niters
    2634              : 
    2635              :    Generate the number of iterations which should be peeled as prolog for the
    2636              :    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
    2637              :    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
    2638              :    As a result, after the execution of this loop, the data reference DR will
    2639              :    refer to an aligned location.  The following computation is generated:
    2640              : 
    2641              :    If the misalignment of DR is known at compile time:
    2642              :      addr_mis = int mis = DR_MISALIGNMENT (dr);
    2643              :    Else, compute address misalignment in bytes:
    2644              :      addr_mis = addr & (target_align - 1)
    2645              : 
    2646              :    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
    2647              : 
    2648              :    (elem_size = element type size; an element is the scalar element whose type
    2649              :    is the inner type of the vectype)
    2650              : 
    2651              :    The computations will be emitted at the end of BB.  We also compute and
    2652              :    store upper bound (included) of the result in BOUND.
    2653              : 
    2654              :    When the step of the data-ref in the loop is not 1 (as in interleaved data
    2655              :    and SLP), the number of iterations of the prolog must be divided by the step
    2656              :    (which is equal to the size of interleaved group).
    2657              : 
    2658              :    The above formulas assume that VF == number of elements in the vector. This
    2659              :    may not hold when there are multiple-types in the loop.
    2660              :    In this case, for some data-references in the loop the VF does not represent
    2661              :    the number of elements that fit in the vector.  Therefore, instead of VF we
    2662              :    use TYPE_VECTOR_SUBPARTS.  */
    2663              : 
    2664              : static tree
    2665          430 : vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
    2666              :                              basic_block bb, poly_int64 *bound)
    2667              : {
    2668          430 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2669          430 :   tree var;
    2670          430 :   tree niters_type
    2671          430 :     = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) ? sizetype
    2672          430 :                                                  : TREE_TYPE (LOOP_VINFO_NITERS
    2673              :                                                               (loop_vinfo));
    2674          430 :   gimple_seq stmts = NULL, new_stmts = NULL;
    2675          430 :   tree iters, iters_name;
    2676          430 :   stmt_vec_info stmt_info = dr_info->stmt;
    2677          430 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2678          430 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2679              : 
    2680          430 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2681              :     {
    2682          226 :       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2683              : 
    2684          226 :       if (dump_enabled_p ())
    2685          217 :         dump_printf_loc (MSG_NOTE, vect_location,
    2686              :                          "known peeling = %d.\n", npeel);
    2687              : 
    2688          226 :       iters = build_int_cst (niters_type, npeel);
    2689          226 :       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2690              :     }
    2691              :   else
    2692              :     {
    2693          204 :       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
    2694          204 :       tree type = TREE_TYPE (misalign_in_elems);
    2695          204 :       HOST_WIDE_INT elem_size
    2696          204 :         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2697              :       /* We only do prolog peeling if the target alignment is known at compile
    2698              :          time.  */
    2699          204 :       poly_uint64 align_in_elems =
    2700          204 :         exact_div (target_align, elem_size);
    2701          204 :       tree align_in_elems_minus_1 =
    2702          204 :         build_int_cst (type, align_in_elems - 1);
    2703          204 :       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
    2704              : 
    2705              :       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
    2706              :                                  & (align_in_elems - 1)).  */
    2707          204 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2708          204 :                                             size_zero_node) < 0;
    2709          204 :       if (negative)
    2710            3 :         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
    2711              :                              align_in_elems_tree);
    2712              :       else
    2713          201 :         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
    2714              :                              misalign_in_elems);
    2715          204 :       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
    2716          204 :       iters = fold_convert (niters_type, iters);
    2717          204 :       *bound = align_in_elems;
    2718              :     }
    2719              : 
    2720          430 :   if (dump_enabled_p ())
    2721          278 :     dump_printf_loc (MSG_NOTE, vect_location,
    2722              :                      "niters for prolog loop: %T\n", iters);
    2723              : 
    2724          430 :   var = create_tmp_var (niters_type, "prolog_loop_niters");
    2725          430 :   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
    2726              : 
    2727          430 :   if (new_stmts)
    2728          204 :     gimple_seq_add_seq (&stmts, new_stmts);
    2729          430 :   if (stmts)
    2730              :     {
    2731          204 :       gcc_assert (single_succ_p (bb));
    2732          204 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
    2733          204 :       if (gsi_end_p (gsi))
    2734           42 :         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2735              :       else
    2736          162 :         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
    2737              :     }
    2738          430 :   return iters_name;
    2739              : }
    2740              : 
    2741              : 
    2742              : /* Function vect_update_init_of_dr
    2743              : 
    2744              :    If CODE is PLUS, the vector loop starts NITERS iterations after the
    2745              :    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
    2746              :    iterations before the scalar one (using masking to skip inactive
    2747              :    elements).  This function updates the information recorded in DR to
    2748              :    account for the difference.  Specifically, it updates the OFFSET
    2749              :    field of DR_INFO.  */
    2750              : 
    2751              : static void
    2752        24365 : vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
    2753              : {
    2754        24365 :   struct data_reference *dr = dr_info->dr;
    2755        24365 :   tree offset = dr_info->offset;
    2756        24365 :   if (!offset)
    2757        24365 :     offset = build_zero_cst (sizetype);
    2758              : 
    2759        24365 :   niters = fold_build2 (MULT_EXPR, sizetype,
    2760              :                         fold_convert (sizetype, niters),
    2761              :                         fold_convert (sizetype, DR_STEP (dr)));
    2762        24365 :   offset = fold_build2 (code, sizetype,
    2763              :                         fold_convert (sizetype, offset), niters);
    2764        24365 :   dr_info->offset = offset;
    2765        24365 : }
    2766              : 
    2767              : 
    2768              : /* Function vect_update_inits_of_drs
    2769              : 
    2770              :    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
    2771              :    CODE and NITERS are as for vect_update_inits_of_dr.  */
    2772              : 
    2773              : void
    2774         7278 : vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
    2775              :                           tree_code code)
    2776              : {
    2777         7278 :   unsigned int i;
    2778         7278 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2779         7278 :   struct data_reference *dr;
    2780              : 
    2781         7278 :   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
    2782              : 
    2783              :   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
    2784              :      here, but since we might use these niters to update the epilogues niters
    2785              :      and data references we can't insert them here as this definition might not
    2786              :      always dominate its uses.  */
    2787         7278 :   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
    2788         4587 :     niters = fold_convert (sizetype, niters);
    2789              : 
    2790        39228 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2791              :     {
    2792        24792 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2793        24792 :       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
    2794        24365 :           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
    2795        24365 :         vect_update_init_of_dr (dr_info, niters, code);
    2796              :     }
    2797         7278 : }
    2798              : 
    2799              : /* For the information recorded in LOOP_VINFO prepare the loop for peeling
    2800              :    by masking.  This involves calculating the number of iterations to
    2801              :    be peeled and then aligning all memory references appropriately.  */
    2802              : 
    2803              : void
    2804            1 : vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
    2805              : {
    2806            1 :   tree misalign_in_elems;
    2807            1 :   tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    2808              : 
    2809            1 :   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
    2810              : 
    2811              :   /* From the information recorded in LOOP_VINFO get the number of iterations
    2812              :      that need to be skipped via masking.  */
    2813            1 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2814              :     {
    2815            1 :       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    2816            1 :                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
    2817            1 :       misalign_in_elems = build_int_cst (type, misalign);
    2818              :     }
    2819              :   else
    2820              :     {
    2821            0 :       gimple_seq seq1 = NULL, seq2 = NULL;
    2822            0 :       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
    2823            0 :       misalign_in_elems = fold_convert (type, misalign_in_elems);
    2824            0 :       misalign_in_elems = force_gimple_operand (misalign_in_elems,
    2825              :                                                 &seq2, true, NULL_TREE);
    2826            0 :       gimple_seq_add_seq (&seq1, seq2);
    2827            0 :       if (seq1)
    2828              :         {
    2829            0 :           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2830            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
    2831            0 :           gcc_assert (!new_bb);
    2832              :         }
    2833              :     }
    2834              : 
    2835            1 :   if (dump_enabled_p ())
    2836            1 :     dump_printf_loc (MSG_NOTE, vect_location,
    2837              :                      "misalignment for fully-masked loop: %T\n",
    2838              :                      misalign_in_elems);
    2839              : 
    2840            1 :   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
    2841              : 
    2842            1 :   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
    2843            1 : }
    2844              : 
    2845              : /* This function builds ni_name = number of iterations.  Statements
    2846              :    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
    2847              :    it to TRUE if new ssa_var is generated.  */
    2848              : 
    2849              : tree
    2850        62233 : vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
    2851              : {
    2852        62233 :   if (LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
    2853              :     return NULL_TREE;
    2854        62159 :   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
    2855        62159 :   if (TREE_CODE (ni) == INTEGER_CST)
    2856              :     return ni;
    2857              :   else
    2858              :     {
    2859        25976 :       tree ni_name, var;
    2860        25976 :       gimple_seq stmts = NULL;
    2861        25976 :       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2862              : 
    2863        25976 :       var = create_tmp_var (TREE_TYPE (ni), "niters");
    2864        25976 :       ni_name = force_gimple_operand (ni, &stmts, false, var);
    2865        25976 :       if (stmts)
    2866              :         {
    2867        24917 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2868        24917 :           if (new_var_p != NULL)
    2869          214 :             *new_var_p = true;
    2870              :         }
    2871              : 
    2872        25976 :       return ni_name;
    2873              :     }
    2874              : }
    2875              : 
    2876              : /* Calculate the number of iterations above which vectorized loop will be
    2877              :    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
    2878              :    of prolog loop.  If it's integer const, the integer number is also passed
    2879              :    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
    2880              :    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
    2881              :    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
    2882              :    threshold below which the scalar (rather than vectorized) loop will be
    2883              :    executed.  This function stores the upper bound (inclusive) of the result
    2884              :    in BOUND_SCALAR.  */
    2885              : 
    2886              : static tree
    2887        24959 : vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
    2888              :                              poly_int64 bound_prolog, poly_int64 bound_epilog,
    2889              :                              int th, poly_uint64 *bound_scalar,
    2890              :                              bool check_profitability)
    2891              : {
    2892        24959 :   tree type = TREE_TYPE (niters_prolog);
    2893        24959 :   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
    2894              :                              build_int_cst (type, bound_epilog));
    2895              : 
    2896        24959 :   *bound_scalar = bound_prolog + bound_epilog;
    2897        24959 :   if (check_profitability)
    2898              :     {
    2899              :       /* TH indicates the minimum niters of vectorized loop, while we
    2900              :          compute the maximum niters of scalar loop.  */
    2901        16033 :       th--;
    2902              :       /* Peeling for constant times.  */
    2903        16033 :       if (int_niters_prolog >= 0)
    2904              :         {
    2905        15995 :           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
    2906        15995 :           return build_int_cst (type, *bound_scalar);
    2907              :         }
    2908              :       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
    2909              :          and BOUND_EPILOG are inclusive upper bounds.  */
    2910           38 :       if (known_ge (th, bound_prolog + bound_epilog))
    2911              :         {
    2912            0 :           *bound_scalar = th;
    2913            0 :           return build_int_cst (type, th);
    2914              :         }
    2915              :       /* Need to do runtime comparison.  */
    2916           38 :       else if (maybe_gt (th, bound_epilog))
    2917              :         {
    2918           38 :           *bound_scalar = upper_bound (*bound_scalar, th);
    2919           38 :           return fold_build2 (MAX_EXPR, type,
    2920              :                               build_int_cst (type, th), niters);
    2921              :         }
    2922              :     }
    2923              :   return niters;
    2924              : }
    2925              : 
    2926              : /* NITERS is the number of times that the original scalar loop executes
    2927              :    after peeling.  Work out the maximum number of iterations N that can
    2928              :    be handled by the vectorized form of the loop and then either:
    2929              : 
    2930              :    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
    2931              : 
    2932              :         niters_vector = N
    2933              : 
    2934              :    b) set *STEP_VECTOR_PTR to one and generate:
    2935              : 
    2936              :         niters_vector = N / vf
    2937              : 
    2938              :    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
    2939              :    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
    2940              :    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).
    2941              : 
    2942              :    Case (a) is used for LOOP_VINFO_USING_PARTIAL_VECTORS_P or if VF is
    2943              :    variable.  As stated above, NITERS_VECTOR then equals the number
    2944              :    of scalar iterations and vect_set_loop_condition will handle the
    2945              :    step.  As opposed to (b) we don't know anything about NITER_VECTOR's
    2946              :    range here.
    2947              : */
    2948              : 
    2949              : void
    2950        33622 : vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
    2951              :                              tree *niters_vector_ptr, tree *step_vector_ptr,
    2952              :                              bool niters_no_overflow)
    2953              : {
    2954        33622 :   tree ni_minus_gap, var;
    2955        33622 :   tree niters_vector, step_vector;
    2956        33622 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    2957        33622 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2958        33622 :   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2959              : 
    2960              :   /* If epilogue loop is required because of data accesses with gaps, we
    2961              :      subtract one iteration from the total number of iterations here for
    2962              :      correct calculation of RATIO.  */
    2963        33622 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2964              :     {
    2965          377 :       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
    2966              :                                   build_one_cst (type));
    2967          377 :       if (!is_gimple_val (ni_minus_gap))
    2968              :         {
    2969          175 :           var = create_tmp_var (type, "ni_gap");
    2970          175 :           gimple *stmts = NULL;
    2971          175 :           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
    2972              :                                                true, var);
    2973          175 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2974              :         }
    2975              :     }
    2976              :   else
    2977              :     ni_minus_gap = niters;
    2978              : 
    2979              :   /* To silence some unexpected warnings, simply initialize to 0. */
    2980        33622 :   unsigned HOST_WIDE_INT const_vf = 0;
    2981        33622 :   if (vf.is_constant (&const_vf)
    2982        33622 :       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    2983              :     {
    2984              :       /* Create: niters / vf, which is equivalent to niters >> log2(vf) when
    2985              :                  vf is a power of two, and when not we approximate using a
    2986              :                  truncating division.  */
    2987              :       /* If it's known that niters == number of latch executions + 1 doesn't
    2988              :          overflow, we can generate niters / vf; otherwise we generate
    2989              :          (niters - vf) / vf + 1 by using the fact that we know ratio
    2990              :          will be at least one.  */
    2991        33604 :       tree var_vf = build_int_cst (type, const_vf);
    2992        33604 :       if (niters_no_overflow)
    2993        33427 :         niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
    2994              :                                      var_vf);
    2995              :       else
    2996          177 :         niters_vector
    2997          177 :           = fold_build2 (PLUS_EXPR, type,
    2998              :                          fold_build2 (TRUNC_DIV_EXPR, type,
    2999              :                                       fold_build2 (MINUS_EXPR, type,
    3000              :                                                    ni_minus_gap,
    3001              :                                                    var_vf),
    3002              :                                       var_vf),
    3003              :                          build_int_cst (type, 1));
    3004        33604 :       step_vector = build_one_cst (type);
    3005              :     }
    3006              :   else
    3007              :     {
    3008           18 :       niters_vector = ni_minus_gap;
    3009           18 :       step_vector = build_int_cst (type, vf);
    3010              :     }
    3011              : 
    3012        33622 :   if (!is_gimple_val (niters_vector))
    3013              :     {
    3014        25754 :       var = create_tmp_var (type, "bnd");
    3015        25754 :       gimple_seq stmts = NULL;
    3016        25754 :       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
    3017        25754 :       gsi_insert_seq_on_edge_immediate (pe, stmts);
    3018              :       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
    3019              :          we set range information to make niters analyzer's life easier.
    3020              :          Note the number of latch iteration value can be TYPE_MAX_VALUE so
    3021              :          we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
    3022        25754 :       if (stmts != NULL
    3023        25754 :           && integer_onep (step_vector))
    3024              :         {
    3025        25739 :           if (niters_no_overflow)
    3026              :             {
    3027        25580 :               int_range<1> vr (type,
    3028        51160 :                                wi::one (TYPE_PRECISION (type)),
    3029        51160 :                                wi::div_trunc (wi::max_value
    3030        25580 :                                               (TYPE_PRECISION (type),
    3031        25580 :                                                TYPE_SIGN (type)),
    3032              :                                               const_vf,
    3033        51160 :                                               TYPE_SIGN (type)));
    3034        25580 :               set_range_info (niters_vector, vr);
    3035        25580 :             }
    3036              :           /* For VF == 1 the vector IV might also overflow so we cannot
    3037              :              assert a minimum value of 1.  */
    3038          159 :           else if (const_vf > 1)
    3039              :             {
    3040          123 :               int_range<1> vr (type,
    3041          246 :                                wi::one (TYPE_PRECISION (type)),
    3042          369 :                                wi::rshift (wi::max_value (TYPE_PRECISION (type),
    3043          123 :                                                           TYPE_SIGN (type))
    3044          246 :                                            - (const_vf - 1),
    3045          246 :                                            exact_log2 (const_vf), TYPE_SIGN (type))
    3046          492 :                                + 1);
    3047          123 :               set_range_info (niters_vector, vr);
    3048          123 :             }
    3049              :         }
    3050              :     }
    3051        33622 :   *niters_vector_ptr = niters_vector;
    3052        33622 :   *step_vector_ptr = step_vector;
    3053              : 
    3054        33622 :   return;
    3055              : }
    3056              : 
    3057              : /* Given NITERS_VECTOR which is the number of iterations for vectorized
    3058              :    loop specified by LOOP_VINFO after vectorization, compute the number
    3059              :    of iterations before vectorization (niters_vector * vf) and store it
    3060              :    to NITERS_VECTOR_MULT_VF_PTR.  */
    3061              : 
    3062              : static void
    3063        32829 : vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
    3064              :                                      tree niters_vector,
    3065              :                                      tree *niters_vector_mult_vf_ptr)
    3066              : {
    3067              :   /* We should be using a step_vector of VF if VF is variable.  */
    3068        32829 :   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
    3069        32829 :   tree type = TREE_TYPE (niters_vector);
    3070        32829 :   tree tree_vf = build_int_cst (type, vf);
    3071        32829 :   basic_block exit_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3072              : 
    3073        32829 :   gcc_assert (niters_vector_mult_vf_ptr != NULL);
    3074        32829 :   tree niters_vector_mult_vf = fold_build2 (MULT_EXPR, type,
    3075              :                                             niters_vector, tree_vf);
    3076              : 
    3077              :   /* If we've peeled a vector iteration then subtract one full vector
    3078              :      iteration.  */
    3079        32829 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3080           19 :     niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
    3081              :                                          niters_vector_mult_vf, tree_vf);
    3082              : 
    3083        32829 :   if (!is_gimple_val (niters_vector_mult_vf))
    3084              :     {
    3085        24984 :       tree var = create_tmp_var (type, "niters_vector_mult_vf");
    3086        24984 :       gimple_seq stmts = NULL;
    3087        24984 :       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
    3088              :                                                     &stmts, true, var);
    3089        24984 :       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
    3090        24984 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    3091              :     }
    3092        32829 :   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
    3093        32829 : }
    3094              : 
    3095              : /* Function slpeel_add_loop_guard adds guard skipping from the beginning
    3096              :    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
    3097              :    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
    3098              :    appear like below:
    3099              : 
    3100              :        guard_bb:
    3101              :          if (cond)
    3102              :            goto merge_bb;
    3103              :          else
    3104              :            goto skip_loop;
    3105              : 
    3106              :      skip_loop:
    3107              :        header_a:
    3108              :          i_1 = PHI<i_0, i_2>;
    3109              :          ...
    3110              :          i_2 = i_1 + 1;
    3111              :          if (cond_a)
    3112              :            goto latch_a;
    3113              :          else
    3114              :            goto exit_a;
    3115              :        latch_a:
    3116              :          goto header_a;
    3117              : 
    3118              :        exit_a:
    3119              :          i_5 = PHI<i_2>;
    3120              : 
    3121              :        merge_bb:
    3122              :          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
    3123              : 
    3124              :      update_loop:
    3125              :        header_b:
    3126              :          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
    3127              :          ...
    3128              :          i_4 = i_3 + 1;
    3129              :          if (cond_b)
    3130              :            goto latch_b;
    3131              :          else
    3132              :            goto exit_bb;
    3133              :        latch_b:
    3134              :          goto header_b;
    3135              : 
    3136              :        exit_bb:
    3137              : 
    3138              :    This function creates PHI nodes at merge_bb and replaces the use of i_5
    3139              :    in the update_loop's PHI node with the result of new PHI result.  */
    3140              : 
    3141              : static void
    3142        25389 : slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
    3143              :                                     class loop *update_loop,
    3144              :                                     edge guard_edge, edge merge_edge)
    3145              : {
    3146        25389 :   location_t merge_loc, guard_loc;
    3147        25389 :   edge orig_e = loop_preheader_edge (skip_loop);
    3148        25389 :   edge update_e = loop_preheader_edge (update_loop);
    3149        25389 :   gphi_iterator gsi_orig, gsi_update;
    3150              : 
    3151        25389 :   for ((gsi_orig = gsi_start_phis (skip_loop->header),
    3152        25389 :         gsi_update = gsi_start_phis (update_loop->header));
    3153        92877 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    3154        67488 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    3155              :     {
    3156        67488 :       gphi *orig_phi = gsi_orig.phi ();
    3157        67488 :       gphi *update_phi = gsi_update.phi ();
    3158              : 
    3159              :       /* Generate new phi node at merge bb of the guard.  */
    3160        67488 :       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    3161        67488 :       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
    3162              : 
    3163              :       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
    3164              :          args in NEW_PHI for these edges.  */
    3165        67488 :       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
    3166        67488 :       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    3167        67488 :       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
    3168        67488 :       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    3169        67488 :       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
    3170        67488 :       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
    3171              : 
    3172              :       /* Update phi in UPDATE_PHI.  */
    3173        67488 :       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
    3174              :     }
    3175        25389 : }
    3176              : 
    3177              : /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
    3178              :    Return a value that equals:
    3179              : 
    3180              :    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
    3181              :    - SKIP_VALUE when the main loop is skipped.  */
    3182              : 
    3183              : tree
    3184         3888 : vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
    3185              :                            tree skip_value)
    3186              : {
    3187         3888 :   gcc_assert (loop_vinfo->main_loop_edge);
    3188              : 
    3189         3888 :   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
    3190         3888 :   basic_block bb = loop_vinfo->main_loop_edge->dest;
    3191         3888 :   gphi *new_phi = create_phi_node (phi_result, bb);
    3192         3888 :   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
    3193              :                UNKNOWN_LOCATION);
    3194         3888 :   add_phi_arg (new_phi, skip_value,
    3195              :                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
    3196         3888 :   return phi_result;
    3197              : }
    3198              : 
    3199              : /* Function vect_do_peeling.
    3200              : 
    3201              :    Input:
    3202              :    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
    3203              : 
    3204              :        preheader:
    3205              :      LOOP:
    3206              :        header_bb:
    3207              :          loop_body
    3208              :          if (exit_loop_cond) goto exit_bb
    3209              :          else                goto header_bb
    3210              :        exit_bb:
    3211              : 
    3212              :    - NITERS: The number of iterations of the loop.
    3213              :    - NITERSM1: The number of iterations of the loop's latch.
    3214              :    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
    3215              :    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
    3216              :                               CHECK_PROFITABILITY is true.
    3217              :    Output:
    3218              :    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
    3219              :      iterate after vectorization; see vect_set_loop_condition for details.
    3220              :    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
    3221              :      should be set to the number of scalar iterations handled by the
    3222              :      vector loop.  The SSA name is only used on exit from the loop.
    3223              : 
    3224              :    This function peels prolog and epilog from the loop, adds guards skipping
    3225              :    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
    3226              :    would look like:
    3227              : 
    3228              :        guard_bb_1:
    3229              :          if (prefer_scalar_loop) goto merge_bb_1
    3230              :          else                    goto guard_bb_2
    3231              : 
    3232              :        guard_bb_2:
    3233              :          if (skip_prolog) goto merge_bb_2
    3234              :          else             goto prolog_preheader
    3235              : 
    3236              :        prolog_preheader:
    3237              :      PROLOG:
    3238              :        prolog_header_bb:
    3239              :          prolog_body
    3240              :          if (exit_prolog_cond) goto prolog_exit_bb
    3241              :          else                  goto prolog_header_bb
    3242              :        prolog_exit_bb:
    3243              : 
    3244              :        merge_bb_2:
    3245              : 
    3246              :        vector_preheader:
    3247              :      VECTOR LOOP:
    3248              :        vector_header_bb:
    3249              :          vector_body
    3250              :          if (exit_vector_cond) goto vector_exit_bb
    3251              :          else                  goto vector_header_bb
    3252              :        vector_exit_bb:
    3253              : 
    3254              :        guard_bb_3:
    3255              :          if (skip_epilog) goto merge_bb_3
    3256              :          else             goto epilog_preheader
    3257              : 
    3258              :        merge_bb_1:
    3259              : 
    3260              :        epilog_preheader:
    3261              :      EPILOG:
    3262              :        epilog_header_bb:
    3263              :          epilog_body
    3264              :          if (exit_epilog_cond) goto merge_bb_3
    3265              :          else                  goto epilog_header_bb
    3266              : 
    3267              :        merge_bb_3:
    3268              : 
    3269              :    Note this function peels prolog and epilog only if it's necessary,
    3270              :    as well as guards.
    3271              :    This function returns the epilogue loop if a decision was made to vectorize
    3272              :    it, otherwise NULL.
    3273              : 
    3274              :    The analysis resulting in this epilogue loop's loop_vec_info was performed
    3275              :    in the same vect_analyze_loop call as the main loop's.  At that time
    3276              :    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
    3277              :    vectorization factors than the main loop.  This list is chained in the
    3278              :    loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
    3279              :    vectorize the epilogue loop for a lower vectorization factor, the
    3280              :    loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
    3281              :    This is later used to vectorize the epilogue.
    3282              :    The reason the loop_vec_info needs updating is that it was
    3283              :    constructed based on the original main loop, and the epilogue loop is a
    3284              :    copy of this loop, so all links pointing to statements in the original loop
    3285              :    need updating.  Furthermore, these loop_vec_infos share the
    3286              :    data_reference's records, which will also need to be updated.
    3287              : 
    3288              :    TODO: Guard for prefer_scalar_loop should be emitted along with
    3289              :    versioning conditions if loop versioning is needed.  */
    3290              : 
    3291              : 
    3292              : class loop *
    3293        61803 : vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
    3294              :                  tree *niters_vector, tree *step_vector,
    3295              :                  tree *niters_vector_mult_vf_var, int th,
    3296              :                  bool check_profitability, bool niters_no_overflow,
    3297              :                  tree *advance)
    3298              : {
    3299        61803 :   edge e, guard_e;
    3300        61803 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    3301        61803 :   tree guard_cond;
    3302        61803 :   basic_block guard_bb, guard_to;
    3303        61803 :   profile_probability prob_prolog, prob_vector, prob_epilog;
    3304        61803 :   int estimated_vf;
    3305        61803 :   int prolog_peeling = 0;
    3306        61803 :   bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
    3307        61803 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    3308              : 
    3309        61803 :   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
    3310        61802 :     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    3311              : 
    3312        61803 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    3313        61803 :   poly_uint64 bound_epilog = 0;
    3314        61803 :   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    3315        61785 :       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
    3316        32510 :     bound_epilog += vf - 1;
    3317        61803 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    3318          377 :     bound_epilog += 1;
    3319              : 
    3320              :   /* For early breaks the scalar loop needs to execute at most VF times
    3321              :      to find the element that caused the break.  */
    3322        61803 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3323        61803 :       && (LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
    3324          773 :           || LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
    3325              :     bound_epilog = vf;
    3326              : 
    3327        61803 :   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
    3328        61803 :   poly_uint64 bound_scalar = bound_epilog;
    3329              : 
    3330        61803 :   if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ())
    3331         8143 :     dump_printf_loc (MSG_NOTE, vect_location,
    3332              :                      "early break does not require epilog.\n");
    3333              : 
    3334        61803 :   if (!prolog_peeling && !epilog_peeling)
    3335              :     return NULL;
    3336              : 
    3337              :   /* Before doing any peeling make sure to reset debug binds outside of
    3338              :      the loop referring to defs not in LC SSA.  */
    3339        32907 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    3340        99993 :   for (unsigned i = 0; i < loop->num_nodes; ++i)
    3341              :     {
    3342        67086 :       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
    3343        67086 :       imm_use_iterator ui;
    3344        67086 :       gimple *use_stmt;
    3345       160029 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    3346        92943 :            gsi_next (&gsi))
    3347              :         {
    3348       387006 :           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
    3349       221101 :             if (gimple_debug_bind_p (use_stmt)
    3350        19981 :                 && loop != gimple_bb (use_stmt)->loop_father
    3351        19997 :                 && !flow_loop_nested_p (loop,
    3352           16 :                                         gimple_bb (use_stmt)->loop_father))
    3353              :               {
    3354            2 :                 gimple_debug_bind_reset_value (use_stmt);
    3355            2 :                 update_stmt (use_stmt);
    3356        92943 :               }
    3357              :         }
    3358       627538 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    3359       493366 :            gsi_next (&gsi))
    3360              :         {
    3361       493366 :           ssa_op_iter op_iter;
    3362       493366 :           def_operand_p def_p;
    3363       799369 :           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
    3364      1071907 :             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
    3365       496523 :               if (gimple_debug_bind_p (use_stmt)
    3366        36622 :                   && loop != gimple_bb (use_stmt)->loop_father
    3367        36649 :                   && !flow_loop_nested_p (loop,
    3368           27 :                                           gimple_bb (use_stmt)->loop_father))
    3369              :                 {
    3370            0 :                   gimple_debug_bind_reset_value (use_stmt);
    3371            0 :                   update_stmt (use_stmt);
    3372       306003 :                 }
    3373              :         }
    3374              :     }
    3375              : 
    3376        32907 :   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
    3377        32907 :   estimated_vf = vect_vf_for_cost (loop_vinfo);
    3378        32907 :   if (estimated_vf == 2)
    3379         6851 :     estimated_vf = 3;
    3380        32907 :   prob_prolog = prob_epilog = profile_probability::guessed_always ()
    3381        32907 :                         .apply_scale (estimated_vf - 1, estimated_vf);
    3382              : 
    3383        32907 :   class loop *prolog = NULL, *epilog = NULL;
    3384        32907 :   class loop *first_loop = loop;
    3385        32907 :   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    3386              : 
    3387              :   /* SSA form needs to be up-to-date since we are going to manually
    3388              :      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
    3389              :      update SSA state after that, so we have to make sure to not lose any
    3390              :      pending update needs.  */
    3391        32907 :   gcc_assert (!need_ssa_update_p (cfun));
    3392              : 
    3393              :   /* If we're vectorizing an epilogue loop, we have ensured that the
    3394              :      virtual operand is in SSA form throughout the vectorized main loop.
    3395              :      Normally it is possible to trace the updated
    3396              :      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
    3397              :      back to scalar-stmt vuses, meaning that the effect of the SSA update
    3398              :      remains local to the main loop.  However, there are rare cases in
    3399              :      which the vectorized loop should have vdefs even when the original scalar
    3400              :      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
    3401              :      introduces clobbers of the temporary vector array, which in turn
    3402              :      needs new vdefs.  If the scalar loop doesn't write to memory, these
    3403              :      new vdefs will be the only ones in the vector loop.
    3404              :      We are currently deferring updating virtual SSA form and creating
    3405              :      of a virtual PHI for this case so we do not have to make sure the
    3406              :      newly introduced virtual def is in LCSSA form.  */
    3407              : 
    3408        32907 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
    3409              :     {
    3410        12420 :       gcc_assert (!adjust_vec.exists ());
    3411        12420 :       adjust_vec.create (32);
    3412              :     }
    3413        32907 :   initialize_original_copy_tables ();
    3414              : 
    3415              :   /* Record the anchor bb at which the guard should be placed if the scalar
    3416              :      loop might be preferred.  */
    3417        32907 :   basic_block anchor = loop_preheader_edge (loop)->src;
    3418              : 
    3419              :   /* Generate the number of iterations for the prolog loop.  We do this here
    3420              :      so that we can also get the upper bound on the number of iterations.  */
    3421        32907 :   tree niters_prolog;
    3422        32907 :   poly_int64 bound_prolog = 0;
    3423        32907 :   if (prolog_peeling)
    3424              :     {
    3425          430 :       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
    3426              :                                                    &bound_prolog);
    3427              :       /* If algonment peeling is known, we will always execute prolog.  */
    3428          430 :       if (TREE_CODE (niters_prolog) == INTEGER_CST)
    3429          226 :         prob_prolog = profile_probability::always ();
    3430              :     }
    3431              :   else
    3432        32477 :     niters_prolog = build_int_cst (type, 0);
    3433              : 
    3434        32907 :   loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
    3435        32907 :   tree niters_vector_mult_vf = NULL_TREE;
    3436              :   /* Saving NITERs before the loop, as this may be changed by prologue.  */
    3437        32907 :   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
    3438        32907 :   edge update_e = NULL, skip_e = NULL;
    3439        32907 :   unsigned int lowest_vf = constant_lower_bound (vf);
    3440              :   /* Prolog loop may be skipped.  */
    3441        32907 :   bool skip_prolog = (prolog_peeling != 0);
    3442              :   /* Skip this loop to epilog when there are not enough iterations to enter this
    3443              :      vectorized loop.  If true we should perform runtime checks on the NITERS
    3444              :      to check whether we should skip the current vectorized loop.  If we know
    3445              :      the number of scalar iterations we may choose to add a runtime check if
    3446              :      this number "maybe" smaller than the number of iterations required
    3447              :      when we know the number of scalar iterations may potentially
    3448              :      be smaller than the number of iterations required to enter this loop, for
    3449              :      this we use the upper bounds on the prolog and epilog peeling.  When we
    3450              :      don't know the number of iterations and don't require versioning it is
    3451              :      because we have asserted that there are enough scalar iterations to enter
    3452              :      the main loop, so this skip is not necessary.  When we are versioning then
    3453              :      we only add such a skip if we have chosen to vectorize the epilogue.  */
    3454        32907 :   bool skip_vector = false;
    3455        32907 :   if (!uncounted_p)
    3456         7890 :     skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3457        40754 :                    ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
    3458         7890 :                                bound_prolog + bound_epilog)
    3459        24999 :                    : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
    3460           40 :                       || vect_epilogues));
    3461              : 
    3462              :   /* Epilog loop must be executed if the number of iterations for epilog
    3463              :      loop is known at compile time, otherwise we need to add a check at
    3464              :      the end of vector loop and skip to the end of epilog loop.  */
    3465        32907 :   bool skip_epilog = (prolog_peeling < 0
    3466        32703 :                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3467        32907 :                       || !vf.is_constant ());
    3468              :   /* PEELING_FOR_GAPS and peeling for early breaks are special because epilog
    3469              :      loop must be executed.  */
    3470        32907 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
    3471        32530 :       || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3472         1208 :     skip_epilog = false;
    3473              : 
    3474        32907 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3475        32907 :   auto_vec<profile_count> original_counts;
    3476        32907 :   basic_block *original_bbs = NULL;
    3477              : 
    3478        32907 :   if (skip_vector)
    3479              :     {
    3480        24959 :       split_edge (loop_preheader_edge (loop));
    3481              : 
    3482        24959 :       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
    3483              :         {
    3484        22743 :           original_bbs = get_loop_body (loop);
    3485        68644 :           for (unsigned int i = 0; i < loop->num_nodes; i++)
    3486        45901 :             original_counts.safe_push(original_bbs[i]->count);
    3487              :         }
    3488              : 
    3489              :       /* Due to the order in which we peel prolog and epilog, we first
    3490              :          propagate probability to the whole loop.  The purpose is to
    3491              :          avoid adjusting probabilities of both prolog and vector loops
    3492              :          separately.  Note in this case, the probability of epilog loop
    3493              :          needs to be scaled back later.  */
    3494        24959 :       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
    3495        24959 :       if (prob_vector.initialized_p ())
    3496              :         {
    3497        24959 :           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
    3498        24959 :           scale_loop_profile (loop, prob_vector, -1);
    3499              :         }
    3500              :     }
    3501              : 
    3502        32907 :   if (vect_epilogues)
    3503              :     {
    3504              :       /* Make sure to set the epilogue's epilogue scalar loop, such that we can
    3505              :          use the original scalar loop as remaining epilogue if necessary.  */
    3506         6847 :       LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
    3507         6847 :         = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3508         6847 :       LOOP_VINFO_SCALAR_MAIN_EXIT (epilogue_vinfo)
    3509         6847 :         = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3510              :     }
    3511              : 
    3512        32907 :   if (prolog_peeling)
    3513              :     {
    3514          430 :       e = loop_preheader_edge (loop);
    3515          430 :       edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3516          430 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
    3517              :                            && (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3518              :                                || uncounted_p));
    3519              : 
    3520              :       /* Peel prolog and put it on preheader edge of loop.  */
    3521          430 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3522          430 :       edge prolog_e = NULL;
    3523          430 :       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
    3524              :                                                        scalar_loop, scalar_e,
    3525              :                                                        e, &prolog_e, true, NULL,
    3526              :                                                        uncounted_p, uncounted_p,
    3527              :                                                        false);
    3528              : 
    3529          430 :       gcc_assert (prolog);
    3530          430 :       prolog->force_vectorize = false;
    3531              : 
    3532              :       /* Assign hierarchical discriminators to distinguish prolog loop.  */
    3533          430 :       gimple *prolog_last = last_nondebug_stmt (prolog->header);
    3534          430 :       location_t prolog_loc
    3535          430 :         = prolog_last ? gimple_location (prolog_last) : UNKNOWN_LOCATION;
    3536          430 :       if (prolog_loc != UNKNOWN_LOCATION)
    3537              :         {
    3538          428 :           unsigned int prolog_copyid = allocate_copyid_base (prolog_loc, 1);
    3539          428 :           assign_discriminators_to_loop (prolog, 0, prolog_copyid);
    3540              :         }
    3541          430 :       first_loop = prolog;
    3542          430 :       reset_original_copy_tables ();
    3543              : 
    3544              :       /* Update the number of iterations for prolog loop.  */
    3545          430 :       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
    3546          430 :       vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
    3547              :                                step_prolog, NULL_TREE, false);
    3548              : 
    3549              :       /* Skip the prolog loop.  */
    3550          430 :       if (skip_prolog)
    3551              :         {
    3552          430 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3553              :                                     niters_prolog, build_int_cst (type, 0));
    3554          430 :           guard_bb = loop_preheader_edge (prolog)->src;
    3555          430 :           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
    3556          430 :           guard_to = split_edge (loop_preheader_edge (loop));
    3557          430 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3558              :                                            guard_to, guard_bb,
    3559              :                                            prob_prolog.invert (),
    3560              :                                            irred_flag);
    3561         1968 :           for (edge alt_e : get_loop_exit_edges (prolog))
    3562              :             {
    3563          678 :               if (alt_e == prolog_e)
    3564          430 :                 continue;
    3565          248 :               basic_block old_dom
    3566          248 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    3567          248 :               if (flow_bb_inside_loop_p (prolog, old_dom))
    3568              :                 {
    3569          103 :                   auto_vec<basic_block, 8> queue;
    3570          103 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    3571          337 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    3572          234 :                     if (!flow_bb_inside_loop_p (prolog, son))
    3573          131 :                       queue.safe_push (son);
    3574          440 :                   for (auto son : queue)
    3575          131 :                     set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
    3576          103 :                 }
    3577          430 :             }
    3578              : 
    3579          430 :           e = EDGE_PRED (guard_to, 0);
    3580          430 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3581          430 :           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
    3582              : 
    3583          430 :           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
    3584          430 :           scale_loop_profile (prolog, prob_prolog,
    3585          430 :                               estimated_poly_value (bound_prolog) - 1);
    3586              :         }
    3587              : 
    3588              :       /* Update init address of DRs.  */
    3589          430 :       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
    3590          430 :       if (!uncounted_p)
    3591              :         {
    3592              :           /* Update niters for vector loop.  */
    3593          399 :           LOOP_VINFO_NITERS (loop_vinfo)
    3594          399 :             = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
    3595          399 :           LOOP_VINFO_NITERSM1 (loop_vinfo)
    3596          399 :             = fold_build2 (MINUS_EXPR, type,
    3597              :                            LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
    3598              :         }
    3599          430 :       bool new_var_p = false;
    3600          430 :       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
    3601              :       /* It's guaranteed that vector loop bound before vectorization is at
    3602              :          least VF, so set range information for newly generated var.  */
    3603          430 :       if (new_var_p)
    3604              :         {
    3605          214 :           int_range<1> vr (type,
    3606          428 :                            wi::to_wide (build_int_cst (type, lowest_vf)),
    3607          428 :                            wi::to_wide (TYPE_MAX_VALUE (type)));
    3608          214 :           set_range_info (niters, vr);
    3609          214 :         }
    3610              : 
    3611              :       /* Prolog iterates at most bound_prolog times, latch iterates at
    3612              :          most bound_prolog - 1 times.  */
    3613          430 :       if (bound_prolog.is_constant ())
    3614          430 :         record_niter_bound (prolog, bound_prolog.to_constant () - 1, false,
    3615              :                             true);
    3616          430 :       delete_update_ssa ();
    3617          430 :       adjust_vec_debug_stmts ();
    3618          430 :       scev_reset ();
    3619              :     }
    3620        32907 :   basic_block bb_before_epilog = NULL;
    3621              : 
    3622        32907 :   if (epilog_peeling)
    3623              :     {
    3624        32872 :       e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3625        32872 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
    3626              : 
    3627              :       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
    3628              :          said epilog then we should use a copy of the main loop as a starting
    3629              :          point.  This loop may have already had some preliminary transformations
    3630              :          to allow for more optimal vectorization, for example if-conversion.
    3631              :          If we are not vectorizing the epilog then we should use the scalar loop
    3632              :          as the transformations mentioned above make less or no sense when not
    3633              :          vectorizing.  */
    3634        32872 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3635        32872 :       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
    3636         6847 :       edge epilog_e = vect_epilogues ? e : scalar_e;
    3637        32872 :       edge new_epilog_e = NULL;
    3638        32872 :       auto_vec<basic_block> doms;
    3639        32872 :       bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
    3640        32872 :       epilog
    3641        32872 :         = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
    3642              :                                                   &new_epilog_e, true, &doms,
    3643              :                                                   uncounted_p, false,
    3644              :                                                   early_break_peel_p);
    3645              : 
    3646        32872 :       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
    3647        32872 :       gcc_assert (epilog);
    3648        32872 :       gcc_assert (new_epilog_e);
    3649        32872 :       epilog->force_vectorize = false;
    3650        32872 :       bb_before_epilog = loop_preheader_edge (epilog)->src;
    3651              : 
    3652              :       /* Assign hierarchical discriminators to distinguish epilog loop.
    3653              :          Only assign if it's a scalar epilog.  If it will be vectorized
    3654              :          (vect_epilogues), discriminators will be assigned.
    3655              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    3656        32872 :       if (!vect_epilogues)
    3657              :         {
    3658        26025 :           gimple *epilog_last = last_nondebug_stmt (epilog->header);
    3659        26025 :           location_t epilog_loc
    3660        26025 :             = epilog_last ? gimple_location (epilog_last) : UNKNOWN_LOCATION;
    3661        25996 :           if (epilog_loc != UNKNOWN_LOCATION)
    3662              :             {
    3663        21173 :               unsigned int epilog_copyid = allocate_copyid_base (epilog_loc, 1);
    3664        21173 :               assign_discriminators_to_loop (epilog, 0, epilog_copyid);
    3665              :             }
    3666              :         }
    3667              : 
    3668              :       /* Scalar version loop may be preferred.  In this case, add guard
    3669              :          and skip to epilog.  Note this only happens when the number of
    3670              :          iterations of loop is unknown at compile time, otherwise this
    3671              :          won't be vectorized.  */
    3672        32872 :       if (skip_vector)
    3673              :         {
    3674              :           /* Additional epilogue iteration is peeled if gap exists.  */
    3675        24959 :           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
    3676        24959 :                                                 bound_prolog, bound_epilog,
    3677              :                                                 th, &bound_scalar,
    3678              :                                                 check_profitability);
    3679              :           /* Build guard against NITERSM1 since NITERS may overflow.  */
    3680        24959 :           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
    3681        24959 :           guard_bb = anchor;
    3682        24959 :           guard_to = split_edge (loop_preheader_edge (epilog));
    3683        24959 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3684              :                                            guard_to, guard_bb,
    3685              :                                            prob_vector.invert (),
    3686              :                                            irred_flag);
    3687        24959 :           skip_e = guard_e;
    3688        24959 :           e = EDGE_PRED (guard_to, 0);
    3689        24959 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3690              : 
    3691              :           /* Handle any remaining dominator updates needed after
    3692              :              inserting the loop skip edge above.  */
    3693        24959 :           if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3694          278 :               && prolog_peeling)
    3695              :             {
    3696              :               /* Adding a skip edge to skip a loop with multiple exits
    3697              :                  means the dominator of the join blocks for all exits shifts
    3698              :                  from the prolog skip guard to the loop skip guard.  */
    3699          155 :               auto prolog_skip_bb
    3700          155 :                 = single_pred (loop_preheader_edge (prolog)->src);
    3701          155 :               auto needs_update
    3702          155 :                 = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
    3703              : 
    3704              :               /* Update everything except for the immediate children of
    3705              :                  the prolog skip block (the prolog and vector preheaders).
    3706              :                  Those should remain dominated by the prolog skip block itself,
    3707              :                  since the loop guard edge goes to the epilogue.  */
    3708          834 :               for (auto bb : needs_update)
    3709          369 :                 if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest
    3710          369 :                     && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest)
    3711           59 :                   set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
    3712          155 :             }
    3713              : 
    3714        24959 :           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
    3715              : 
    3716              :           /* Simply propagate profile info from guard_bb to guard_to which is
    3717              :              a merge point of control flow.  */
    3718        24959 :           profile_count old_count = guard_to->count;
    3719        24959 :           guard_to->count = guard_bb->count;
    3720              : 
    3721              :           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
    3722        24959 :           if (vect_epilogues || scalar_loop == NULL)
    3723              :             {
    3724        22743 :               gcc_assert(epilog->num_nodes == loop->num_nodes);
    3725        22743 :               basic_block *bbs = get_loop_body (epilog);
    3726        68644 :               for (unsigned int i = 0; i < epilog->num_nodes; i++)
    3727              :                 {
    3728        45901 :                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
    3729        45901 :                   bbs[i]->count = original_counts[i];
    3730              :                 }
    3731        22743 :               free (bbs);
    3732        22743 :               free (original_bbs);
    3733              :             }
    3734         2216 :           else if (old_count.nonzero_p ())
    3735         2216 :             scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
    3736              : 
    3737              :           /* Only need to handle basic block before epilog loop if it's not
    3738              :              the guard_bb, which is the case when skip_vector is true.  */
    3739        24959 :           if (guard_bb != bb_before_epilog && single_pred_p (bb_before_epilog))
    3740        24821 :             bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
    3741        24959 :           bb_before_epilog = loop_preheader_edge (epilog)->src;
    3742              :         }
    3743              : 
    3744        32872 :       if (!uncounted_p)
    3745              :         {
    3746              :           /* If loop is peeled for non-zero constant times, now niters refers to
    3747              :              orig_niters - prolog_peeling, it won't overflow even the
    3748              :              orig_niters overflows.  */
    3749        32829 :           niters_no_overflow |= (prolog_peeling > 0);
    3750        32829 :           vect_gen_vector_loop_niters (loop_vinfo, niters,
    3751              :                                        niters_vector, step_vector,
    3752              :                                        niters_no_overflow);
    3753        32829 :           if (!integer_onep (*step_vector))
    3754              :             {
    3755              :               /* On exit from the loop we will have an easy way of calculating
    3756              :                  NITERS_VECTOR / STEP * STEP.  Install a dummy definition
    3757              :                  until then.  */
    3758            0 :               niters_vector_mult_vf
    3759            0 :                 = make_ssa_name (TREE_TYPE (*niters_vector));
    3760            0 :               edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3761            0 :               gimple_stmt_iterator loop_cond_gsi
    3762            0 :                 = gsi_after_labels (exit_e->dest);
    3763              : 
    3764            0 :               gcall *tmp = gimple_build_call_internal (IFN_VARYING, 0);
    3765            0 :               gimple_call_set_lhs (tmp, niters_vector_mult_vf);
    3766            0 :               gsi_insert_before (&loop_cond_gsi, tmp, GSI_SAME_STMT);
    3767            0 :               *niters_vector_mult_vf_var = niters_vector_mult_vf;
    3768              :             }
    3769              :           else
    3770        32829 :             vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
    3771              :                                                  &niters_vector_mult_vf);
    3772              :           /* Update IVs of original loop as if they were advanced by
    3773              :              niters_vector_mult_vf steps.  */
    3774        32829 :           gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
    3775        32829 :           update_e = skip_vector ? e : loop_preheader_edge (epilog);
    3776              :         }
    3777        32872 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3778          827 :         update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
    3779              : 
    3780              :       /* If we have a peeled vector iteration we will never skip the epilog loop
    3781              :          and we can simplify the cfg a lot by not doing the edge split.  */
    3782        32872 :       if (skip_epilog
    3783        32872 :           || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3784          827 :               && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
    3785              :         {
    3786        25274 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3787              :                                     niters, niters_vector_mult_vf);
    3788              : 
    3789        25274 :           guard_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3790        25274 :           edge epilog_e = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3791        25274 :           guard_to = epilog_e->dest;
    3792        25774 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
    3793              :                                            skip_vector ? anchor : guard_bb,
    3794              :                                            prob_epilog.invert (),
    3795              :                                            irred_flag);
    3796              : 
    3797        25274 :           doms.safe_push (guard_to);
    3798        25274 :           if (vect_epilogues)
    3799         5505 :             epilogue_vinfo->skip_this_loop_edge = guard_e;
    3800        25274 :           edge main_iv = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3801        25274 :           gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
    3802        25274 :           for (gphi_iterator gsi = gsi_start_phis (guard_to);
    3803        61810 :                !gsi_end_p (gsi); gsi_next (&gsi))
    3804              :             {
    3805              :               /* We are expecting all of the PHIs we have on epilog_e
    3806              :                  to be also on the main loop exit.  But sometimes
    3807              :                  a stray virtual definition can appear at epilog_e
    3808              :                  which we can then take as the same on all exits,
    3809              :                  we've removed the LC SSA PHI on the main exit before
    3810              :                  so we wouldn't need to create a loop PHI for it.  */
    3811        36536 :               if (virtual_operand_p (gimple_phi_result (*gsi))
    3812        36536 :                   && (gsi_end_p (gsi2)
    3813        36614 :                       || !virtual_operand_p (gimple_phi_result (*gsi2))))
    3814          172 :                 add_phi_arg (*gsi,
    3815           86 :                              gimple_phi_arg_def_from_edge (*gsi, epilog_e),
    3816              :                              guard_e, UNKNOWN_LOCATION);
    3817              :               else
    3818              :                 {
    3819        36450 :                   add_phi_arg (*gsi, gimple_phi_result (*gsi2), guard_e,
    3820              :                                UNKNOWN_LOCATION);
    3821        36450 :                   gsi_next (&gsi2);
    3822              :                 }
    3823              :             }
    3824              : 
    3825              :           /* Only need to handle basic block before epilog loop if it's not
    3826              :              the guard_bb, which is the case when skip_vector is true.  */
    3827        25274 :           if (guard_bb != bb_before_epilog)
    3828              :             {
    3829        25228 :               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
    3830              : 
    3831        25228 :               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
    3832              :             }
    3833        25274 :           scale_loop_profile (epilog, prob_epilog, -1);
    3834              :         }
    3835              : 
    3836              :       /* If we have a peeled vector iteration, all exits are the same, leave it
    3837              :          and so the main exit needs to be treated the same as the alternative
    3838              :          exits in that we leave their updates to vectorizable_live_operations.
    3839              :          */
    3840        32872 :       tree vector_iters_vf = niters_vector_mult_vf;
    3841        32872 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3842              :         {
    3843          827 :           tree tmp_niters_vf
    3844          827 :             = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
    3845          827 :           gcall *tmp_call = gimple_build_call_internal (IFN_VARYING, 0);
    3846          827 :           gimple_call_set_lhs (tmp_call, tmp_niters_vf);
    3847          827 :           auto header_gsi = gsi_after_labels (loop->header);
    3848          827 :           gsi_insert_after (&header_gsi, tmp_call, GSI_SAME_STMT);
    3849              : 
    3850           43 :           if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
    3851          913 :                 && get_loop_exit_edges (loop).length () == 1)
    3852          841 :               && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
    3853              :           {
    3854          615 :             basic_block exit_bb = NULL;
    3855          615 :             edge update_e = NULL;
    3856              : 
    3857              :             /* Identify the early exit merge block.  I wish we had stored
    3858              :                this.  */
    3859         1845 :             for (auto e : get_loop_exit_edges (loop))
    3860          615 :               if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
    3861              :                 {
    3862          615 :                   exit_bb = e->dest;
    3863          615 :                   update_e = single_succ_edge (exit_bb);
    3864          615 :                   break;
    3865          615 :                 }
    3866          615 :             vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
    3867              :                                               update_e, true);
    3868              :           }
    3869          827 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3870              :             vector_iters_vf = tmp_niters_vf;
    3871              : 
    3872          827 :           LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
    3873              :         }
    3874              : 
    3875        32872 :         bool recalculate_peel_niters_init
    3876        32872 :           = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
    3877        32872 :         vect_update_ivs_after_vectorizer (loop_vinfo, vector_iters_vf,
    3878              :                                           update_e,
    3879              :                                           recalculate_peel_niters_init);
    3880              : 
    3881              :       /* Recalculate the dominators after adding the guard edge.  */
    3882        32872 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3883          827 :         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    3884              : 
    3885              :       /* When we do not have a loop-around edge to the epilog we know
    3886              :          the vector loop covered at least VF scalar iterations unless
    3887              :          we have early breaks.
    3888              :          Update any known upper bound with this knowledge.  */
    3889        32872 :       if (! skip_vector
    3890         7913 :           && ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3891              :         {
    3892         7364 :           if (epilog->any_upper_bound)
    3893         7364 :             epilog->nb_iterations_upper_bound -= lowest_vf;
    3894         7364 :           if (epilog->any_likely_upper_bound)
    3895         7364 :             epilog->nb_iterations_likely_upper_bound -= lowest_vf;
    3896         7364 :           if (epilog->any_estimate)
    3897         7362 :             epilog->nb_iterations_estimate -= lowest_vf;
    3898              :         }
    3899              : 
    3900        32872 :       unsigned HOST_WIDE_INT bound;
    3901        32872 :       if (bound_scalar.is_constant (&bound))
    3902              :         {
    3903        32872 :           gcc_assert (bound != 0);
    3904              :           /* Adjust the upper bound by the extra peeled vector iteration if we
    3905              :              are an epilogue of an peeled vect loop and not VLA.  For VLA the
    3906              :              loop bounds are unknown.  */
    3907        65725 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3908        32872 :               && vf.is_constant ())
    3909           62 :             bound += vf.to_constant ();
    3910              :           /* -1 to convert loop iterations to latch iterations.  */
    3911        32872 :           record_niter_bound (epilog, bound - 1, false, true);
    3912        32872 :           scale_loop_profile (epilog, profile_probability::always (),
    3913              :                               bound - 1);
    3914              :         }
    3915              : 
    3916        32872 :       delete_update_ssa ();
    3917        32872 :       adjust_vec_debug_stmts ();
    3918        32872 :       scev_reset ();
    3919        32872 :     }
    3920              : 
    3921        32907 :   if (vect_epilogues)
    3922              :     {
    3923         6847 :       epilog->aux = epilogue_vinfo;
    3924         6847 :       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
    3925         6847 :       LOOP_VINFO_MAIN_EXIT (epilogue_vinfo)
    3926         6847 :         = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3927              : 
    3928         6847 :       loop_constraint_clear (epilog, LOOP_C_INFINITE);
    3929              : 
    3930              :       /* We now must calculate the number of NITERS performed by the previous
    3931              :          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
    3932         6847 :       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
    3933              :                                  niters_prolog, niters_vector_mult_vf);
    3934              : 
    3935              :       /* If skip_vector we may skip the previous loop, we insert a phi-node to
    3936              :          determine whether we are coming from the previous vectorized loop
    3937              :          using the update_e edge or the skip_vector basic block using the
    3938              :          skip_e edge.  */
    3939         6847 :       if (skip_vector)
    3940              :         {
    3941         5576 :           gcc_assert (update_e != NULL && skip_e != NULL);
    3942         5576 :           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
    3943              :                                            update_e->dest);
    3944         5576 :           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
    3945         5576 :           gimple *stmt = gimple_build_assign (new_ssa, niters);
    3946         5576 :           gimple_stmt_iterator gsi;
    3947         5576 :           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
    3948         5576 :               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
    3949              :             {
    3950         5576 :               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
    3951         5576 :               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
    3952              :             }
    3953              :           else
    3954              :             {
    3955            0 :               gsi = gsi_last_bb (update_e->src);
    3956            0 :               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
    3957              :             }
    3958              : 
    3959         5576 :           niters = new_ssa;
    3960         5576 :           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
    3961         5576 :           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
    3962              :                        UNKNOWN_LOCATION);
    3963         5576 :           niters = PHI_RESULT (new_phi);
    3964         5576 :           epilogue_vinfo->main_loop_edge = update_e;
    3965         5576 :           epilogue_vinfo->skip_main_loop_edge = skip_e;
    3966              :         }
    3967              : 
    3968              :       /* Set ADVANCE to the number of iterations performed by the previous
    3969              :          loop and its prologue.  */
    3970         6847 :       *advance = niters;
    3971              : 
    3972              :       /* Subtract the number of iterations performed by the vectorized loop
    3973              :          from the number of total iterations.  */
    3974         6847 :       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
    3975              :                                           before_loop_niters,
    3976              :                                           niters);
    3977              : 
    3978         6847 :       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
    3979         6847 :       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
    3980         6847 :         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
    3981              :                        epilogue_niters,
    3982              :                        build_one_cst (TREE_TYPE (epilogue_niters)));
    3983              :     }
    3984              : 
    3985        32907 :   adjust_vec.release ();
    3986        32907 :   free_original_copy_tables ();
    3987              : 
    3988        32907 :   return vect_epilogues ? epilog : NULL;
    3989        32907 : }
    3990              : 
    3991              : /* Function vect_create_cond_for_niters_checks.
    3992              : 
    3993              :    Create a conditional expression that represents the run-time checks for
    3994              :    loop's niter.  The loop is guaranteed to terminate if the run-time
    3995              :    checks hold.
    3996              : 
    3997              :    Input:
    3998              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3999              :                 with logical AND operation.  If it is NULL, then the function
    4000              :                 is used to return the number of alias checks.
    4001              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    4002              :                 to be checked.
    4003              : 
    4004              :    Output:
    4005              :    COND_EXPR - conditional expression.
    4006              : 
    4007              :    The returned COND_EXPR is the conditional expression to be used in the
    4008              :    if statement that controls which version of the loop gets executed at
    4009              :    runtime.  */
    4010              : 
    4011              : static void
    4012          376 : vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
    4013              : {
    4014          376 :   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
    4015              : 
    4016          376 :   if (*cond_expr)
    4017          376 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4018              :                               *cond_expr, part_cond_expr);
    4019              :   else
    4020            0 :     *cond_expr = part_cond_expr;
    4021          376 : }
    4022              : 
    4023              : /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    4024              :    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
    4025              : 
    4026              : static void
    4027          287 : chain_cond_expr (tree *cond_expr, tree part_cond_expr)
    4028              : {
    4029          287 :   if (*cond_expr)
    4030          283 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4031              :                               *cond_expr, part_cond_expr);
    4032              :   else
    4033            4 :     *cond_expr = part_cond_expr;
    4034          287 : }
    4035              : 
    4036              : /* Function vect_create_cond_for_align_checks.
    4037              : 
    4038              :    Create a conditional expression that represents the alignment checks for
    4039              :    all of data references (array element references) whose alignment must be
    4040              :    checked at runtime.
    4041              : 
    4042              :    Input:
    4043              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4044              :                 with logical AND operation.
    4045              :    LOOP_VINFO - three fields of the loop information are used.
    4046              :                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
    4047              :                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
    4048              :                 LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT indicates which check applies.
    4049              : 
    4050              :    Output:
    4051              :    COND_EXPR_STMT_LIST - statements needed to construct the conditional
    4052              :                          expression.
    4053              :    The returned value is the conditional expression to be used in the if
    4054              :    statement that controls which version of the loop gets executed at runtime.
    4055              : 
    4056              :    Based on the boolean value of LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT, we decide
    4057              :    which type of check should be applied and create two different expressions
    4058              :    accordingly.
    4059              :      1) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is false, we see if all data refs
    4060              :         to be checked are already aligned to an alignment boundary.  We create
    4061              :         an expression of "(a_1 | a_2 | a_3 | ... | a_n) & mask", where "a_i" is
    4062              :         the address of i'th data reference.
    4063              :      2) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we see if all data refs
    4064              :         can be aligned to a boundary after a certain amount of peeling, in other
    4065              :         words, their addresses have the same bottom bits according to the mask.
    4066              :         We create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask",
    4067              :         where "a_i" is the address of i'th data reference.
    4068              : 
    4069              :    Both algorithms make two assumptions:
    4070              :      1) The number of bytes "n" in a vector is a power of 2.
    4071              :      2) An address "a" is aligned if a%n is zero and that this
    4072              :         test can be done as a&(n-1) == 0.  For example, for 16
    4073              :         byte vectors the test is a&0xf == 0.  */
    4074              : 
    4075              : static void
    4076           45 : vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    4077              :                                    tree *cond_expr,
    4078              :                                    gimple_seq *cond_expr_stmt_list)
    4079              : {
    4080           45 :   const vec<stmt_vec_info> &may_misalign_stmts
    4081              :     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    4082           45 :   stmt_vec_info stmt_info;
    4083           45 :   poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
    4084           45 :   tree mask_cst;
    4085           45 :   unsigned int i;
    4086           45 :   tree int_ptrsize_type;
    4087           45 :   char tmp_name[30];
    4088           45 :   tree or_tmp_name = NULL_TREE;
    4089           45 :   tree prev_addr_tmp_name = NULL_TREE;
    4090           45 :   tree and_tmp_name;
    4091           45 :   gimple *and_stmt;
    4092           45 :   tree ptrsize_zero;
    4093           45 :   tree part_cond_expr;
    4094              : 
    4095           45 :   gcc_assert (known_ne (mask, 0U));
    4096              : 
    4097           45 :   int_ptrsize_type = signed_type_for (ptr_type_node);
    4098              : 
    4099              :   /* If LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we should have at least two
    4100              :      datarefs to check the mutual alignment.  */
    4101           45 :   gcc_assert (may_misalign_stmts.length () > 1
    4102              :               || !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo));
    4103              : 
    4104          109 :   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    4105              :     {
    4106           64 :       gimple_seq new_stmt_list = NULL;
    4107           64 :       tree addr_base;
    4108           64 :       tree addr_tmp_name;
    4109           64 :       tree xor_tmp_name;
    4110           64 :       tree new_or_tmp_name;
    4111           64 :       gimple *addr_stmt, *or_stmt, *xor_stmt;
    4112           64 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4113           64 :       bool negative = tree_int_cst_compare
    4114           64 :         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
    4115           64 :       tree offset = negative
    4116           64 :         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    4117              :                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    4118           64 :         : size_zero_node;
    4119              : 
    4120              :       /* create: addr_tmp = (int)(address_of_first_vector) */
    4121           64 :       addr_base =
    4122           64 :         vect_create_addr_base_for_vector_ref (loop_vinfo,
    4123              :                                               stmt_info, &new_stmt_list,
    4124              :                                               offset);
    4125           64 :       if (new_stmt_list != NULL)
    4126           14 :         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
    4127              : 
    4128           64 :       sprintf (tmp_name, "addr2int%d", i);
    4129           64 :       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    4130           64 :       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
    4131           64 :       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
    4132              : 
    4133           64 :       if (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo))
    4134              :         {
    4135              :           /* Create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask"
    4136              :              to check mutual alignment.  */
    4137           36 :           if (prev_addr_tmp_name != NULL_TREE)
    4138              :             {
    4139           18 :               sprintf (tmp_name, "xorptrs%d_%d", i - 1, i);
    4140           18 :               xor_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4141              :                                                  tmp_name);
    4142           18 :               xor_stmt = gimple_build_assign (xor_tmp_name, BIT_XOR_EXPR,
    4143              :                                               prev_addr_tmp_name,
    4144              :                                               addr_tmp_name);
    4145           18 :               gimple_seq_add_stmt (cond_expr_stmt_list, xor_stmt);
    4146           18 :               if (or_tmp_name == NULL_TREE)
    4147              :                 {
    4148              :                   /* Create the 1st XOR when the 2nd data ref is seen.  */
    4149              :                   or_tmp_name = xor_tmp_name;
    4150              :                 }
    4151              :               else
    4152              :                 {
    4153              :                   /* Create: or_tmp = or_tmp | new_xor_tmp.  */
    4154            0 :                   sprintf (tmp_name, "orxors%d", i - 1);
    4155            0 :                   new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4156              :                                                         tmp_name);
    4157            0 :                   or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4158              :                                                  or_tmp_name, xor_tmp_name);
    4159            0 :                   gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4160            0 :                   or_tmp_name = new_or_tmp_name;
    4161              :                 }
    4162              :             }
    4163              :           prev_addr_tmp_name = addr_tmp_name;
    4164              :         }
    4165              :       else
    4166              :         {
    4167              :           /* Create: "(a_1 | a_2 | a_3 | ... | a_n) & mask" to check if all
    4168              :              addresses are already aligned.  */
    4169           28 :           if (or_tmp_name != NULL_TREE)
    4170              :             {
    4171              :               /* Create: or_tmp = or_tmp | addr_tmp.  */
    4172            1 :               sprintf (tmp_name, "orptrs%d", i);
    4173            1 :               new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4174              :                                                     tmp_name);
    4175            1 :               or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4176              :                                              or_tmp_name, addr_tmp_name);
    4177            1 :               gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4178            1 :               or_tmp_name = new_or_tmp_name;
    4179              :             }
    4180              :           else
    4181              :             or_tmp_name = addr_tmp_name;
    4182              :         }
    4183              : 
    4184              :     } /* end for i */
    4185              : 
    4186           45 :   mask_cst = build_int_cst (int_ptrsize_type, mask);
    4187              : 
    4188              :   /* create: and_tmp = or_tmp & mask  */
    4189           45 :   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
    4190              : 
    4191           45 :   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
    4192              :                                   or_tmp_name, mask_cst);
    4193           45 :   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
    4194              : 
    4195              :   /* Make and_tmp the left operand of the conditional test against zero.
    4196              :      if and_tmp has a nonzero bit then some address is unaligned.  */
    4197           45 :   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
    4198           45 :   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4199              :                                 and_tmp_name, ptrsize_zero);
    4200           45 :   chain_cond_expr (cond_expr, part_cond_expr);
    4201           45 : }
    4202              : 
    4203              : /* Function vect_create_cond_for_vla_spec_read.
    4204              : 
    4205              :    Create a conditional expression that represents the run-time checks with
    4206              :    max speculative read amount in VLA modes.  We check two things:
    4207              :      1) if the max speculative read amount exceeds the min page size
    4208              :      2) if the VF is power-of-2 - done by checking the max read amount instead
    4209              : 
    4210              :    Input:
    4211              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4212              :                 with logical AND operation.
    4213              :    LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max
    4214              :                 possible speculative read amount in VLA modes.
    4215              : 
    4216              :    Output:
    4217              :    COND_EXPR - conditional expression.
    4218              : 
    4219              :    The returned COND_EXPR is the conditional expression to be used in the
    4220              :    if statement that controls which version of the loop gets executed at
    4221              :    runtime.  */
    4222              : 
    4223              : static void
    4224            0 : vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr)
    4225              : {
    4226            0 :   poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo);
    4227            0 :   tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly);
    4228              : 
    4229              :   /* Both the read amount and the VF must be variants, and the read amount must
    4230              :      be a constant power-of-2 multiple of the VF.  */
    4231            0 :   unsigned HOST_WIDE_INT multiple;
    4232            0 :   gcc_assert (!read_amount_poly.is_constant ()
    4233              :               && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    4234              :               && constant_multiple_p (read_amount_poly,
    4235              :                                       LOOP_VINFO_VECT_FACTOR (loop_vinfo),
    4236              :                                       &multiple)
    4237              :               && pow2p_hwi (multiple));
    4238              : 
    4239              :   tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, 0U);
    4240              :   tree cst_ul_one = build_int_cstu (long_unsigned_type_node, 1U);
    4241              :   tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node,
    4242              :                                          (unsigned long) param_min_pagesize);
    4243              : 
    4244              :   /* Create an expression of "amount & (amount - 1) == 0".  */
    4245              :   tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node,
    4246              :                                 amount, cst_ul_one);
    4247              :   tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node,
    4248              :                                       amount, amount_m1);
    4249              :   tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4250              :                                        amount_and_expr, cst_ul_zero);
    4251              :   chain_cond_expr (cond_expr, powof2_cond_expr);
    4252              : 
    4253              :   /* Create an expression of "amount <= cst_ul_pagesize".  */
    4254              :   tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node,
    4255              :                                          amount, cst_ul_pagesize);
    4256              :   chain_cond_expr (cond_expr, pagesize_cond_expr);
    4257              : }
    4258              : 
    4259              : /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
    4260              :    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
    4261              :    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    4262              :    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
    4263              : 
    4264              : static void
    4265         3291 : vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
    4266              : {
    4267         3291 :   const vec<vec_object_pair> &pairs
    4268              :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    4269         3291 :   unsigned int i;
    4270         3291 :   vec_object_pair *pair;
    4271         3303 :   FOR_EACH_VEC_ELT (pairs, i, pair)
    4272              :     {
    4273           12 :       tree addr1 = build_fold_addr_expr (pair->first);
    4274           12 :       tree addr2 = build_fold_addr_expr (pair->second);
    4275           12 :       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
    4276              :                                          addr1, addr2);
    4277           12 :       chain_cond_expr (cond_expr, part_cond_expr);
    4278              :     }
    4279         3291 : }
    4280              : 
    4281              : /* Create an expression that is true when all lower-bound conditions for
    4282              :    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
    4283              : 
    4284              : static void
    4285         3291 : vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
    4286              : {
    4287         3291 :   const vec<vec_lower_bound> &lower_bounds
    4288              :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    4289         3521 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    4290              :     {
    4291          230 :       tree expr = lower_bounds[i].expr;
    4292          230 :       tree type = unsigned_type_for (TREE_TYPE (expr));
    4293          230 :       expr = fold_convert (type, expr);
    4294          230 :       poly_uint64 bound = lower_bounds[i].min_value;
    4295          230 :       if (!lower_bounds[i].unsigned_p)
    4296              :         {
    4297           72 :           expr = fold_build2 (PLUS_EXPR, type, expr,
    4298              :                               build_int_cstu (type, bound - 1));
    4299           72 :           bound += bound - 1;
    4300              :         }
    4301          230 :       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
    4302              :                                          build_int_cstu (type, bound));
    4303          230 :       chain_cond_expr (cond_expr, part_cond_expr);
    4304              :     }
    4305         3291 : }
    4306              : 
    4307              : /* Function vect_create_cond_for_alias_checks.
    4308              : 
    4309              :    Create a conditional expression that represents the run-time checks for
    4310              :    overlapping of address ranges represented by a list of data references
    4311              :    relations passed as input.
    4312              : 
    4313              :    Input:
    4314              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4315              :                 with logical AND operation.  If it is NULL, then the function
    4316              :                 is used to return the number of alias checks.
    4317              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    4318              :                 to be checked.
    4319              : 
    4320              :    Output:
    4321              :    COND_EXPR - conditional expression.
    4322              : 
    4323              :    The returned COND_EXPR is the conditional expression to be used in the if
    4324              :    statement that controls which version of the loop gets executed at runtime.
    4325              : */
    4326              : 
    4327              : void
    4328         3291 : vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
    4329              : {
    4330         3291 :   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
    4331              :     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    4332              : 
    4333         3291 :   if (comp_alias_ddrs.is_empty ())
    4334              :     return;
    4335              : 
    4336         3180 :   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
    4337              :                                &comp_alias_ddrs, cond_expr);
    4338         3180 :   if (dump_enabled_p ())
    4339         1284 :     dump_printf_loc (MSG_NOTE, vect_location,
    4340              :                      "created %u versioning for alias checks.\n",
    4341              :                      comp_alias_ddrs.length ());
    4342              : }
    4343              : 
    4344              : 
    4345              : /* Function vect_loop_versioning.
    4346              : 
    4347              :    If the loop has data references that may or may not be aligned or/and
    4348              :    has data reference relations whose independence was not proven then
    4349              :    two versions of the loop need to be generated, one which is vectorized
    4350              :    and one which isn't.  A test is then generated to control which of the
    4351              :    loops is executed.  The test checks for the alignment of all of the
    4352              :    data references that may or may not be aligned.  An additional
    4353              :    sequence of runtime tests is generated for each pairs of DDRs whose
    4354              :    independence was not proven.  The vectorized version of loop is
    4355              :    executed only if both alias and alignment tests are passed.
    4356              : 
    4357              :    The test generated to check which version of loop is executed
    4358              :    is modified to also check for profitability as indicated by the
    4359              :    cost model threshold TH.
    4360              : 
    4361              :    The versioning precondition(s) are placed in *COND_EXPR and
    4362              :    *COND_EXPR_STMT_LIST.  */
    4363              : 
    4364              : class loop *
    4365         3749 : vect_loop_versioning (loop_vec_info loop_vinfo,
    4366              :                       gimple *loop_vectorized_call)
    4367              : {
    4368         3749 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
    4369         3749 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    4370         3749 :   basic_block condition_bb;
    4371         3749 :   gphi_iterator gsi;
    4372         3749 :   gimple_stmt_iterator cond_exp_gsi;
    4373         3749 :   basic_block merge_bb;
    4374         3749 :   basic_block new_exit_bb;
    4375         3749 :   edge new_exit_e, e;
    4376         3749 :   gphi *orig_phi, *new_phi;
    4377         3749 :   tree cond_expr = NULL_TREE;
    4378         3749 :   gimple_seq cond_expr_stmt_list = NULL;
    4379         3749 :   tree arg;
    4380         3749 :   profile_probability prob = profile_probability::likely ();
    4381         3749 :   gimple_seq gimplify_stmt_list = NULL;
    4382         3749 :   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
    4383         3749 :   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
    4384         3749 :   bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo);
    4385         3749 :   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
    4386         3749 :   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
    4387         3749 :   poly_uint64 versioning_threshold
    4388              :     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
    4389         3749 :   tree version_simd_if_cond
    4390              :     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
    4391         3749 :   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
    4392         3749 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    4393              : 
    4394         3749 :   if (!uncounted_p && vect_apply_runtime_profitability_check_p (loop_vinfo)
    4395              :       && !ordered_p (th, versioning_threshold))
    4396              :     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4397              :                              build_int_cst (TREE_TYPE (scalar_loop_iters),
    4398              :                                             th - 1));
    4399         3749 :   if (!uncounted_p && maybe_ne (versioning_threshold, 0U))
    4400              :     {
    4401         3736 :       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4402              :                                build_int_cst (TREE_TYPE (scalar_loop_iters),
    4403              :                                               versioning_threshold - 1));
    4404         3736 :       if (cond_expr)
    4405            0 :         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
    4406              :                                  expr, cond_expr);
    4407              :       else
    4408         3736 :         cond_expr = expr;
    4409              :     }
    4410              : 
    4411         3749 :   tree cost_name = NULL_TREE;
    4412         3749 :   profile_probability prob2 = profile_probability::always ();
    4413         3749 :   if (cond_expr
    4414         3736 :       && EXPR_P (cond_expr)
    4415         2597 :       && (version_niter
    4416         2597 :           || version_align
    4417              :           || version_alias
    4418         2261 :           || version_simd_if_cond))
    4419              :     {
    4420         2597 :       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4421              :                                                       &cond_expr_stmt_list,
    4422              :                                                       is_gimple_val, NULL_TREE);
    4423              :       /* Split prob () into two so that the overall probability of passing
    4424              :          both the cost-model and versioning checks is the orig prob.  */
    4425         2597 :       prob2 = prob = prob.sqrt ();
    4426              :     }
    4427              : 
    4428         3749 :   if (version_niter)
    4429          376 :     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
    4430              : 
    4431         3749 :   if (cond_expr)
    4432              :     {
    4433         3736 :       gimple_seq tem = NULL;
    4434         3736 :       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4435              :                                           &tem, is_gimple_condexpr_for_cond,
    4436              :                                           NULL_TREE);
    4437         3736 :       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
    4438              :     }
    4439              : 
    4440         3749 :   if (version_align)
    4441           45 :     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
    4442              :                                        &cond_expr_stmt_list);
    4443              : 
    4444         3749 :   if (version_spec_read)
    4445            0 :     vect_create_cond_for_vla_spec_read (loop_vinfo, &cond_expr);
    4446              : 
    4447         3749 :   if (version_alias)
    4448              :     {
    4449         3291 :       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
    4450         3291 :       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
    4451         3291 :       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
    4452              :     }
    4453              : 
    4454         3749 :   if (version_simd_if_cond)
    4455              :     {
    4456           58 :       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
    4457           58 :       if (flag_checking)
    4458           58 :         if (basic_block bb
    4459           58 :             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
    4460           58 :           gcc_assert (bb != loop->header
    4461              :                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
    4462              :                       && (scalar_loop == NULL
    4463              :                           || (bb != scalar_loop->header
    4464              :                               && dominated_by_p (CDI_DOMINATORS,
    4465              :                                                  scalar_loop->header, bb))));
    4466           58 :       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
    4467           58 :       tree c = fold_build2 (NE_EXPR, boolean_type_node,
    4468              :                             version_simd_if_cond, zero);
    4469           58 :       if (cond_expr)
    4470           58 :         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4471              :                                  c, cond_expr);
    4472              :       else
    4473            0 :         cond_expr = c;
    4474           58 :       if (dump_enabled_p ())
    4475            5 :         dump_printf_loc (MSG_NOTE, vect_location,
    4476              :                          "created versioning for simd if condition check.\n");
    4477              :     }
    4478              : 
    4479         3749 :   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4480              :                                       &gimplify_stmt_list,
    4481              :                                       is_gimple_condexpr_for_cond, NULL_TREE);
    4482         3749 :   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    4483              : 
    4484              :   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
    4485              :      invariant in.  */
    4486         3749 :   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
    4487         3749 :   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
    4488        68004 :        !gsi_end_p (gsi); gsi_next (&gsi))
    4489              :     {
    4490        64255 :       gimple *stmt = gsi_stmt (gsi);
    4491        64255 :       update_stmt (stmt);
    4492        64255 :       ssa_op_iter iter;
    4493        64255 :       use_operand_p use_p;
    4494        64255 :       basic_block def_bb;
    4495       147783 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    4496        83528 :         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
    4497        83528 :             && flow_bb_inside_loop_p (outermost, def_bb))
    4498         2508 :           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
    4499              :     }
    4500              : 
    4501              :   /* Search for the outermost loop we can version.  Avoid versioning of
    4502              :      non-perfect nests but allow if-conversion versioned loops inside.  */
    4503         3749 :   class loop *loop_to_version = loop;
    4504         3749 :   if (flow_loop_nested_p (outermost, loop))
    4505              :     {
    4506         1443 :       if (dump_enabled_p ())
    4507          676 :         dump_printf_loc (MSG_NOTE, vect_location,
    4508              :                          "trying to apply versioning to outer loop %d\n",
    4509              :                          outermost->num);
    4510         1443 :       if (outermost->num == 0)
    4511         1364 :         outermost = superloop_at_depth (loop, 1);
    4512              :       /* And avoid applying versioning on non-perfect nests.  */
    4513              :       while (loop_to_version != outermost
    4514          101 :              && (e = single_exit (loop_outer (loop_to_version)))
    4515           82 :              && !(e->flags & EDGE_COMPLEX)
    4516           81 :              && (!loop_outer (loop_to_version)->inner->next
    4517           53 :                  || vect_loop_vectorized_call (loop_to_version))
    4518           33 :              && (!loop_outer (loop_to_version)->inner->next
    4519            5 :                  || !loop_outer (loop_to_version)->inner->next->next)
    4520         1507 :              && can_duplicate_loop_p (loop_outer (loop_to_version)))
    4521           31 :         loop_to_version = loop_outer (loop_to_version);
    4522              :     }
    4523              : 
    4524              :   /* Apply versioning.  If there is already a scalar version created by
    4525              :      if-conversion re-use that.  Note we cannot re-use the copy of
    4526              :      an if-converted outer-loop when vectorizing the inner loop only.  */
    4527         3749 :   gcond *cond;
    4528         3749 :   if ((!loop_to_version->inner || loop == loop_to_version)
    4529         3722 :       && loop_vectorized_call)
    4530              :     {
    4531           88 :       gcc_assert (scalar_loop);
    4532           88 :       condition_bb = gimple_bb (loop_vectorized_call);
    4533          176 :       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
    4534           88 :       gimple_cond_set_condition_from_tree (cond, cond_expr);
    4535           88 :       update_stmt (cond);
    4536              : 
    4537           88 :       if (cond_expr_stmt_list)
    4538              :         {
    4539           88 :           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
    4540           88 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4541              :                                  GSI_SAME_STMT);
    4542              :         }
    4543              : 
    4544              :       /* if-conversion uses profile_probability::always () for both paths,
    4545              :          reset the paths probabilities appropriately.  */
    4546           88 :       edge te, fe;
    4547           88 :       extract_true_false_edges_from_block (condition_bb, &te, &fe);
    4548           88 :       te->probability = prob;
    4549           88 :       fe->probability = prob.invert ();
    4550              :       /* We can scale loops counts immediately but have to postpone
    4551              :          scaling the scalar loop because we re-use it during peeling.
    4552              : 
    4553              :          Ifcvt duplicates loop preheader, loop body and produces an basic
    4554              :          block after loop exit.  We need to scale all that.  */
    4555           88 :       basic_block preheader = loop_preheader_edge (loop_to_version)->src;
    4556           88 :       preheader->count = preheader->count.apply_probability (prob * prob2);
    4557           88 :       scale_loop_frequencies (loop_to_version, prob * prob2);
    4558              :       /* When the loop has multiple exits then we can only version itself.
    4559              :         This is denoted by loop_to_version == loop.  In this case we can
    4560              :         do the versioning by selecting the exit edge the vectorizer is
    4561              :         currently using.  */
    4562           88 :       edge exit_edge;
    4563           88 :       if (loop_to_version == loop)
    4564           88 :        exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4565              :       else
    4566            0 :        exit_edge = single_exit (loop_to_version);
    4567           88 :       exit_edge->dest->count = preheader->count;
    4568           88 :       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
    4569              : 
    4570           88 :       nloop = scalar_loop;
    4571           88 :       if (dump_enabled_p ())
    4572           90 :         dump_printf_loc (MSG_NOTE, vect_location,
    4573              :                          "reusing %sloop version created by if conversion\n",
    4574              :                          loop_to_version != loop ? "outer " : "");
    4575           88 :     }
    4576              :   else
    4577              :     {
    4578         3661 :       if (loop_to_version != loop
    4579         3661 :           && dump_enabled_p ())
    4580           14 :         dump_printf_loc (MSG_NOTE, vect_location,
    4581              :                          "applying loop versioning to outer loop %d\n",
    4582              :                          loop_to_version->num);
    4583              : 
    4584         3661 :       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
    4585              : 
    4586         3661 :       initialize_original_copy_tables ();
    4587         7322 :       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
    4588         3661 :                             prob * prob2, (prob * prob2).invert (),
    4589         3661 :                             prob * prob2, (prob * prob2).invert (),
    4590              :                             true);
    4591              : 
    4592              :       /* If the PHI nodes in the loop header were reallocated, we need to fix up
    4593              :          our internally stashed copies of those.  */
    4594         3661 :       if (loop_to_version == loop)
    4595         3634 :         for (auto gsi = gsi_start_phis (loop->header);
    4596        14499 :              !gsi_end_p (gsi); gsi_next (&gsi))
    4597        10865 :           loop_vinfo->resync_stmt_addr (gsi.phi ());
    4598              : 
    4599              :       /* We will later insert second conditional so overall outcome of
    4600              :          both is prob * prob2.  */
    4601         3661 :       edge true_e, false_e;
    4602         3661 :       extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
    4603         3661 :       true_e->probability = prob;
    4604         3661 :       false_e->probability = prob.invert ();
    4605         3661 :       gcc_assert (nloop);
    4606         3661 :       nloop = get_loop_copy (loop);
    4607              : 
    4608              :       /* Assign hierarchical discriminators to distinguish loop versions.
    4609              :          Only assign to the scalar version here; the vectorized version will
    4610              :          get discriminators later during transformation/peeling.
    4611              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    4612         3661 :       gimple *nloop_last = last_nondebug_stmt (nloop->header);
    4613         3661 :       location_t nloop_loc
    4614         3661 :         = nloop_last ? gimple_location (nloop_last) : UNKNOWN_LOCATION;
    4615         3661 :       if (nloop_loc != UNKNOWN_LOCATION)
    4616              :         {
    4617         3195 :           unsigned int nloop_copyid = allocate_copyid_base (nloop_loc, 1);
    4618         3195 :           assign_discriminators_to_loop (nloop, 0, nloop_copyid);
    4619              :         }
    4620              :       /* For cycle vectorization with SLP we rely on the PHI arguments
    4621              :          appearing in the same order as the SLP node operands which for the
    4622              :          loop PHI nodes means the preheader edge dest index needs to remain
    4623              :          the same for the analyzed loop which also becomes the vectorized one.
    4624              :          Make it so in case the state after versioning differs by redirecting
    4625              :          the first edge into the header to the same destination which moves
    4626              :          it last.  */
    4627         3661 :       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
    4628              :         {
    4629          292 :           edge e = EDGE_PRED (loop->header, 0);
    4630          292 :           ssa_redirect_edge (e, e->dest);
    4631          292 :           flush_pending_stmts (e);
    4632              :         }
    4633         3661 :       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
    4634              : 
    4635              :       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
    4636              :          reap those otherwise;  they also refer to the original
    4637              :          loops.  */
    4638              :       class loop *l = loop;
    4639         3666 :       while (gimple *call = vect_loop_vectorized_call (l))
    4640              :         {
    4641            5 :           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
    4642            5 :           fold_loop_internal_call (call, boolean_false_node);
    4643            5 :           l = loop_outer (l);
    4644            5 :         }
    4645         3661 :       free_original_copy_tables ();
    4646              : 
    4647         3661 :       if (cond_expr_stmt_list)
    4648              :         {
    4649         3580 :           cond_exp_gsi = gsi_last_bb (condition_bb);
    4650         3580 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4651              :                                  GSI_SAME_STMT);
    4652              :         }
    4653              : 
    4654              :       /* Loop versioning violates an assumption we try to maintain during
    4655              :          vectorization - that the loop exit block has a single predecessor.
    4656              :          After versioning, the exit block of both loop versions is the same
    4657              :          basic block (i.e. it has two predecessors). Just in order to simplify
    4658              :          following transformations in the vectorizer, we fix this situation
    4659              :          here by adding a new (empty) block on the exit-edge of the loop,
    4660              :          with the proper loop-exit phis to maintain loop-closed-form.
    4661              :          If loop versioning wasn't done from loop, but scalar_loop instead,
    4662              :          merge_bb will have already just a single successor.  */
    4663              : 
    4664              :       /* When the loop has multiple exits then we can only version itself.
    4665              :          This is denoted by loop_to_version == loop.  In this case we can
    4666              :          do the versioning by selecting the exit edge the vectorizer is
    4667              :          currently using.  */
    4668         3661 :       edge exit_edge;
    4669         3661 :       if (loop_to_version == loop)
    4670         3634 :         exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4671              :       else
    4672           27 :         exit_edge = single_exit (loop_to_version);
    4673              : 
    4674         3661 :       gcc_assert (exit_edge);
    4675         3661 :       merge_bb = exit_edge->dest;
    4676         3661 :       if (EDGE_COUNT (merge_bb->preds) >= 2)
    4677              :         {
    4678         3661 :           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
    4679         3661 :           new_exit_bb = split_edge (exit_edge);
    4680         3661 :           new_exit_e = exit_edge;
    4681         3661 :           e = EDGE_SUCC (new_exit_bb, 0);
    4682              : 
    4683         7574 :           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
    4684         3913 :                gsi_next (&gsi))
    4685              :             {
    4686         3913 :               tree new_res;
    4687         3913 :               orig_phi = gsi.phi ();
    4688         3913 :               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    4689         3913 :               new_phi = create_phi_node (new_res, new_exit_bb);
    4690         3913 :               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
    4691         3913 :               add_phi_arg (new_phi, arg, new_exit_e,
    4692              :                            gimple_phi_arg_location_from_edge (orig_phi, e));
    4693         3913 :               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
    4694              :             }
    4695              :         }
    4696              : 
    4697         3661 :       update_ssa (TODO_update_ssa_no_phi);
    4698              :     }
    4699              : 
    4700              :   /* Split the cost model check off to a separate BB.  Costing assumes
    4701              :      this is the only thing we perform when we enter the scalar loop
    4702              :      from a failed cost decision.  */
    4703         3749 :   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
    4704              :     {
    4705         2597 :       gimple *def = SSA_NAME_DEF_STMT (cost_name);
    4706         2597 :       gcc_assert (gimple_bb (def) == condition_bb);
    4707              :       /* All uses of the cost check are 'true' after the check we
    4708              :          are going to insert.  */
    4709         2597 :       replace_uses_by (cost_name, boolean_true_node);
    4710              :       /* And we're going to build the new single use of it.  */
    4711         2597 :       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
    4712              :                                        NULL_TREE, NULL_TREE);
    4713         2597 :       edge e = split_block (gimple_bb (def), def);
    4714         2597 :       gimple_stmt_iterator gsi = gsi_for_stmt (def);
    4715         2597 :       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
    4716         2597 :       edge true_e, false_e;
    4717         2597 :       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
    4718         2597 :       e->flags &= ~EDGE_FALLTHRU;
    4719         2597 :       e->flags |= EDGE_TRUE_VALUE;
    4720         2597 :       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
    4721         2597 :       e->probability = prob2;
    4722         2597 :       e2->probability = prob2.invert ();
    4723         2597 :       e->dest->count = e->count ();
    4724         2597 :       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
    4725         2597 :       auto_vec<basic_block, 3> adj;
    4726         2597 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
    4727         7695 :            son;
    4728         5098 :            son = next_dom_son (CDI_DOMINATORS, son))
    4729         7599 :         if (EDGE_COUNT (son->preds) > 1)
    4730         2501 :           adj.safe_push (son);
    4731        10292 :       for (auto son : adj)
    4732         2501 :         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
    4733              :       //debug_bb (condition_bb);
    4734              :       //debug_bb (e->src);
    4735         2597 :     }
    4736              : 
    4737         3749 :   if (version_niter)
    4738              :     {
    4739              :       /* The versioned loop could be infinite, we need to clear existing
    4740              :          niter information which is copied from the original loop.  */
    4741          376 :       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
    4742          376 :       vect_free_loop_info_assumptions (nloop);
    4743              :     }
    4744              : 
    4745         3749 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    4746         3749 :       && dump_enabled_p ())
    4747              :     {
    4748          818 :       if (version_alias)
    4749          744 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4750              :                          vect_location,
    4751              :                          "loop versioned for vectorization because of "
    4752              :                          "possible aliasing\n");
    4753          818 :       if (version_align)
    4754           30 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4755              :                          vect_location,
    4756              :                          "loop versioned for vectorization to enhance "
    4757              :                          "alignment\n");
    4758              : 
    4759              :     }
    4760              : 
    4761         3749 :   return nloop;
    4762              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.