LCOV - code coverage report
Current view: top level - gcc - tree-vect-loop-manip.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 81.8 % 1963 1605
Test Date: 2026-05-30 15:37:04 Functions: 85.1 % 47 40
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Vectorizer Specific Loop Manipulations
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              :    and Ira Rosen <irar@il.ibm.com>
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "cfghooks.h"
      29              : #include "tree-pass.h"
      30              : #include "ssa.h"
      31              : #include "fold-const.h"
      32              : #include "cfganal.h"
      33              : #include "gimplify.h"
      34              : #include "gimple-iterator.h"
      35              : #include "gimplify-me.h"
      36              : #include "tree-cfg.h"
      37              : #include "tree-ssa-loop-manip.h"
      38              : #include "tree-into-ssa.h"
      39              : #include "tree-ssa.h"
      40              : #include "cfgloop.h"
      41              : #include "tree-scalar-evolution.h"
      42              : #include "tree-vectorizer.h"
      43              : #include "tree-ssa-loop-ivopts.h"
      44              : #include "gimple-fold.h"
      45              : #include "tree-ssa-loop-niter.h"
      46              : #include "internal-fn.h"
      47              : #include "stor-layout.h"
      48              : #include "optabs-query.h"
      49              : #include "vec-perm-indices.h"
      50              : #include "insn-config.h"
      51              : #include "rtl.h"
      52              : #include "recog.h"
      53              : #include "langhooks.h"
      54              : #include "tree-vector-builder.h"
      55              : #include "optabs-tree.h"
      56              : #include "hierarchical_discriminator.h"
      57              : 
      58              : 
      59              : /*************************************************************************
      60              :   Simple Loop Peeling Utilities
      61              : 
      62              :   Utilities to support loop peeling for vectorization purposes.
      63              :  *************************************************************************/
      64              : 
      65              : 
      66              : /* Renames the use *OP_P.  */
      67              : 
      68              : static void
      69       898323 : rename_use_op (use_operand_p op_p)
      70              : {
      71       898323 :   tree new_name;
      72              : 
      73       898323 :   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
      74              :     return;
      75              : 
      76       894908 :   new_name = get_current_def (USE_FROM_PTR (op_p));
      77              : 
      78              :   /* Something defined outside of the loop.  */
      79       894908 :   if (!new_name)
      80              :     return;
      81              : 
      82              :   /* An ordinary ssa name defined in the loop.  */
      83              : 
      84       772526 :   SET_USE (op_p, new_name);
      85              : }
      86              : 
      87              : 
      88              : /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
      89              :    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
      90              :    true.  */
      91              : 
      92              : static void
      93       110260 : rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
      94              : {
      95       110260 :   gimple *stmt;
      96       110260 :   use_operand_p use_p;
      97       110260 :   ssa_op_iter iter;
      98       110260 :   edge e;
      99       110260 :   edge_iterator ei;
     100       110260 :   class loop *loop = bb->loop_father;
     101       110260 :   class loop *outer_loop = NULL;
     102              : 
     103       110260 :   if (rename_from_outer_loop)
     104              :     {
     105          935 :       gcc_assert (loop);
     106          935 :       outer_loop = loop_outer (loop);
     107              :     }
     108              : 
     109       731344 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
     110       510824 :        gsi_next (&gsi))
     111              :     {
     112       510824 :       stmt = gsi_stmt (gsi);
     113      1257378 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
     114       746554 :         rename_use_op (use_p);
     115              :     }
     116              : 
     117       225603 :   FOR_EACH_EDGE (e, ei, bb->preds)
     118              :     {
     119       115343 :       if (!flow_bb_inside_loop_p (loop, e->src))
     120              :         {
     121        34488 :           if (!rename_from_outer_loop)
     122        34202 :             continue;
     123          286 :           if (e->src != outer_loop->header)
     124              :             {
     125          177 :               if (outer_loop->inner->next)
     126              :                 {
     127              :                   /* If outer_loop has 2 inner loops, allow there to
     128              :                      be an extra basic block which decides which of the
     129              :                      two loops to use using LOOP_VECTORIZED.  */
     130          174 :                   if (!single_pred_p (e->src)
     131           42 :                       || single_pred (e->src) != outer_loop->header)
     132          132 :                     continue;
     133              :                 }
     134              :             }
     135              :         }
     136       184654 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
     137       103645 :            gsi_next (&gsi))
     138       103645 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
     139              :     }
     140       110260 : }
     141              : 
     142              : 
     143              : struct adjust_info
     144              : {
     145              :   tree from, to;
     146              :   basic_block bb;
     147              : };
     148              : 
     149              : /* A stack of values to be adjusted in debug stmts.  We have to
     150              :    process them LIFO, so that the closest substitution applies.  If we
     151              :    processed them FIFO, without the stack, we might substitute uses
     152              :    with a PHI DEF that would soon become non-dominant, and when we got
     153              :    to the suitable one, it wouldn't have anything to substitute any
     154              :    more.  */
     155              : static vec<adjust_info, va_heap> adjust_vec;
     156              : 
     157              : /* Adjust any debug stmts that referenced AI->from values to use the
     158              :    loop-closed AI->to, if the references are dominated by AI->bb and
     159              :    not by the definition of AI->from.  */
     160              : 
     161              : static void
     162        77934 : adjust_debug_stmts_now (adjust_info *ai)
     163              : {
     164        77934 :   basic_block bbphi = ai->bb;
     165        77934 :   tree orig_def = ai->from;
     166        77934 :   tree new_def = ai->to;
     167        77934 :   imm_use_iterator imm_iter;
     168        77934 :   gimple *stmt;
     169        77934 :   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
     170              : 
     171        77934 :   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
     172              : 
     173              :   /* Adjust any debug stmts that held onto non-loop-closed
     174              :      references.  */
     175       389083 :   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
     176              :     {
     177       233215 :       use_operand_p use_p;
     178       233215 :       basic_block bbuse;
     179              : 
     180       233215 :       if (!is_gimple_debug (stmt))
     181       175936 :         continue;
     182              : 
     183        57279 :       gcc_assert (gimple_debug_bind_p (stmt));
     184              : 
     185        57279 :       bbuse = gimple_bb (stmt);
     186              : 
     187        57279 :       if ((bbuse == bbphi
     188        57279 :            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
     189        59111 :           && !(bbuse == bbdef
     190          916 :                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
     191              :         {
     192            0 :           if (new_def)
     193            0 :             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
     194            0 :               SET_USE (use_p, new_def);
     195              :           else
     196              :             {
     197            0 :               gimple_debug_bind_reset_value (stmt);
     198            0 :               update_stmt (stmt);
     199              :             }
     200              :         }
     201        77934 :     }
     202        77934 : }
     203              : 
     204              : /* Adjust debug stmts as scheduled before.  */
     205              : 
     206              : static void
     207        33175 : adjust_vec_debug_stmts (void)
     208              : {
     209        33175 :   if (!MAY_HAVE_DEBUG_BIND_STMTS)
     210              :     return;
     211              : 
     212        12378 :   gcc_assert (adjust_vec.exists ());
     213              : 
     214        90207 :   while (!adjust_vec.is_empty ())
     215              :     {
     216        77829 :       adjust_debug_stmts_now (&adjust_vec.last ());
     217        77829 :       adjust_vec.pop ();
     218              :     }
     219              : }
     220              : 
     221              : /* Adjust any debug stmts that referenced FROM values to use the
     222              :    loop-closed TO, if the references are dominated by BB and not by
     223              :    the definition of FROM.  If adjust_vec is non-NULL, adjustments
     224              :    will be postponed until adjust_vec_debug_stmts is called.  */
     225              : 
     226              : static void
     227       105989 : adjust_debug_stmts (tree from, tree to, basic_block bb)
     228              : {
     229       105989 :   adjust_info ai;
     230              : 
     231       105989 :   if (MAY_HAVE_DEBUG_BIND_STMTS
     232       105989 :       && TREE_CODE (from) == SSA_NAME
     233        96626 :       && ! SSA_NAME_IS_DEFAULT_DEF (from)
     234       201258 :       && ! virtual_operand_p (from))
     235              :     {
     236        77934 :       ai.from = from;
     237        77934 :       ai.to = to;
     238        77934 :       ai.bb = bb;
     239              : 
     240        77934 :       if (adjust_vec.exists ())
     241        77829 :         adjust_vec.safe_push (ai);
     242              :       else
     243          105 :         adjust_debug_stmts_now (&ai);
     244              :     }
     245       105989 : }
     246              : 
     247              : /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
     248              :    to adjust any debug stmts that referenced the old phi arg,
     249              :    presumably non-loop-closed references left over from other
     250              :    transformations.  */
     251              : 
     252              : static void
     253       214647 : adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
     254              : {
     255       214647 :   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
     256              : 
     257       214647 :   gcc_assert (TREE_CODE (orig_def) != SSA_NAME
     258              :               || orig_def != new_def);
     259              : 
     260       214647 :   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
     261              : 
     262       214647 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
     263        83079 :     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
     264              :                         gimple_bb (update_phi));
     265       214647 : }
     266              : 
     267              : /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
     268              :    that the control should have during the first iteration and NEXT_CTRL is the
     269              :    value that it should have on subsequent iterations.  */
     270              : 
     271              : static void
     272           86 : vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
     273              :                        tree next_ctrl)
     274              : {
     275           86 :   gphi *phi = create_phi_node (ctrl, loop->header);
     276           86 :   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
     277           86 :   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
     278           86 : }
     279              : 
     280              : /* Add SEQ to the end of LOOP's preheader block.  */
     281              : 
     282              : static void
     283           18 : add_preheader_seq (class loop *loop, gimple_seq seq)
     284              : {
     285           18 :   if (seq)
     286              :     {
     287           15 :       edge pe = loop_preheader_edge (loop);
     288           15 :       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
     289           15 :       gcc_assert (!new_bb);
     290              :     }
     291           18 : }
     292              : 
     293              : /* Add SEQ to the beginning of LOOP's header block.  */
     294              : 
     295              : static void
     296            0 : add_header_seq (class loop *loop, gimple_seq seq)
     297              : {
     298            0 :   if (seq)
     299              :     {
     300            0 :       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
     301            0 :       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
     302              :     }
     303            0 : }
     304              : 
     305              : /* Return true if the target can interleave elements of two vectors.
     306              :    OFFSET is 0 if the first half of the vectors should be interleaved
     307              :    or 1 if the second half should.  When returning true, store the
     308              :    associated permutation in INDICES.  */
     309              : 
     310              : static bool
     311            0 : interleave_supported_p (vec_perm_indices *indices, tree vectype,
     312              :                         unsigned int offset)
     313              : {
     314            0 :   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
     315            0 :   poly_uint64 base = exact_div (nelts, 2) * offset;
     316            0 :   vec_perm_builder sel (nelts, 2, 3);
     317            0 :   for (unsigned int i = 0; i < 3; ++i)
     318              :     {
     319            0 :       sel.quick_push (base + i);
     320            0 :       sel.quick_push (base + i + nelts);
     321              :     }
     322            0 :   indices->new_vector (sel, 2, nelts);
     323            0 :   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
     324            0 :                                *indices);
     325            0 : }
     326              : 
     327              : /* Try to use permutes to define the masks in DEST_RGM using the masks
     328              :    in SRC_RGM, given that the former has twice as many masks as the
     329              :    latter.  Return true on success, adding any new statements to SEQ.  */
     330              : 
     331              : static bool
     332            0 : vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
     333              :                                rgroup_controls *src_rgm)
     334              : {
     335            0 :   tree src_masktype = src_rgm->type;
     336            0 :   tree dest_masktype = dest_rgm->type;
     337            0 :   machine_mode src_mode = TYPE_MODE (src_masktype);
     338            0 :   insn_code icode1, icode2;
     339            0 :   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
     340            0 :       && (icode1 = optab_handler (vec_unpacku_hi_optab,
     341              :                                   src_mode)) != CODE_FOR_nothing
     342            0 :       && (icode2 = optab_handler (vec_unpacku_lo_optab,
     343              :                                   src_mode)) != CODE_FOR_nothing)
     344              :     {
     345              :       /* Unpacking the source masks gives at least as many mask bits as
     346              :          we need.  We can then VIEW_CONVERT any excess bits away.  */
     347            0 :       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
     348            0 :       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
     349            0 :       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
     350            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     351              :         {
     352            0 :           tree src = src_rgm->controls[i / 2];
     353            0 :           tree dest = dest_rgm->controls[i];
     354            0 :           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
     355            0 :                             ? VEC_UNPACK_HI_EXPR
     356              :                             : VEC_UNPACK_LO_EXPR);
     357            0 :           gassign *stmt;
     358            0 :           if (dest_masktype == unpack_masktype)
     359            0 :             stmt = gimple_build_assign (dest, code, src);
     360              :           else
     361              :             {
     362            0 :               tree temp = make_ssa_name (unpack_masktype);
     363            0 :               stmt = gimple_build_assign (temp, code, src);
     364            0 :               gimple_seq_add_stmt (seq, stmt);
     365            0 :               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
     366              :                                           build1 (VIEW_CONVERT_EXPR,
     367              :                                                   dest_masktype, temp));
     368              :             }
     369            0 :           gimple_seq_add_stmt (seq, stmt);
     370              :         }
     371              :       return true;
     372              :     }
     373            0 :   vec_perm_indices indices[2];
     374            0 :   if (dest_masktype == src_masktype
     375            0 :       && interleave_supported_p (&indices[0], src_masktype, 0)
     376            0 :       && interleave_supported_p (&indices[1], src_masktype, 1))
     377              :     {
     378              :       /* The destination requires twice as many mask bits as the source, so
     379              :          we can use interleaving permutes to double up the number of bits.  */
     380              :       tree masks[2];
     381            0 :       for (unsigned int i = 0; i < 2; ++i)
     382            0 :         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
     383            0 :       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     384              :         {
     385            0 :           tree src = src_rgm->controls[i / 2];
     386            0 :           tree dest = dest_rgm->controls[i];
     387            0 :           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
     388            0 :                                               src, src, masks[i & 1]);
     389            0 :           gimple_seq_add_stmt (seq, stmt);
     390              :         }
     391            0 :       return true;
     392              :     }
     393              :   return false;
     394            0 : }
     395              : 
     396              : /* Populate DEST_RGM->controls, given that they should add up to STEP.
     397              : 
     398              :      STEP = MIN_EXPR <ivtmp_34, VF>;
     399              : 
     400              :      First length (MIN (X, VF/N)):
     401              :        loop_len_15 = MIN_EXPR <STEP, VF/N>;
     402              : 
     403              :      Second length:
     404              :        tmp = STEP - loop_len_15;
     405              :        loop_len_16 = MIN (tmp, VF/N);
     406              : 
     407              :      Third length:
     408              :        tmp2 = tmp - loop_len_16;
     409              :        loop_len_17 = MIN (tmp2, VF/N);
     410              : 
     411              :      Last length:
     412              :        loop_len_18 = tmp2 - loop_len_17;
     413              : */
     414              : 
     415              : static void
     416            0 : vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
     417              :                                rgroup_controls *dest_rgm, tree step)
     418              : {
     419            0 :   tree ctrl_type = dest_rgm->type;
     420            0 :   poly_uint64 nitems_per_ctrl
     421            0 :     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
     422            0 :   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
     423              : 
     424            0 :   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
     425              :     {
     426            0 :       tree ctrl = dest_rgm->controls[i];
     427            0 :       if (i == 0)
     428              :         {
     429              :           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
     430            0 :           gassign *assign
     431            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     432            0 :           gimple_seq_add_stmt (seq, assign);
     433              :         }
     434            0 :       else if (i == dest_rgm->controls.length () - 1)
     435              :         {
     436              :           /* Last iteration: Remain capped to the range [0, VF/N].  */
     437            0 :           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
     438            0 :                                                  dest_rgm->controls[i - 1]);
     439            0 :           gimple_seq_add_stmt (seq, assign);
     440              :         }
     441              :       else
     442              :         {
     443              :           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
     444            0 :           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
     445            0 :                                dest_rgm->controls[i - 1]);
     446            0 :           gassign *assign
     447            0 :             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
     448            0 :           gimple_seq_add_stmt (seq, assign);
     449              :         }
     450              :     }
     451            0 : }
     452              : 
     453              : /* Stores the standard position for induction variable increment in belonging to
     454              :    LOOP_EXIT (just before the exit condition of the given exit to BSI.
     455              :    INSERT_AFTER is set to true if the increment should be inserted after
     456              :    *BSI.  */
     457              : 
     458              : void
     459        62018 : vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
     460              :                             bool *insert_after)
     461              : {
     462        62018 :   basic_block bb = loop_exit->src;
     463        62018 :   *bsi = gsi_last_bb (bb);
     464        62018 :   *insert_after = false;
     465        62018 : }
     466              : 
     467              : /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
     468              :    for all the rgroup controls in RGC and return a control that is nonzero
     469              :    when the loop needs to iterate.  Add any new preheader statements to
     470              :    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
     471              : 
     472              :    RGC belongs to loop LOOP.  The loop originally iterated NITERS
     473              :    times and has been vectorized according to LOOP_VINFO.
     474              : 
     475              :    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
     476              :    starts with NITERS_SKIP dummy iterations of the scalar loop before
     477              :    the real work starts.  The mask elements for these dummy iterations
     478              :    must be 0, to ensure that the extra iterations do not have an effect.
     479              : 
     480              :    It is known that:
     481              : 
     482              :      NITERS * RGC->max_nscalars_per_iter * RGC->factor
     483              : 
     484              :    does not overflow.  However, MIGHT_WRAP_P says whether an induction
     485              :    variable that starts at 0 and has step:
     486              : 
     487              :      VF * RGC->max_nscalars_per_iter * RGC->factor
     488              : 
     489              :    might overflow before hitting a value above:
     490              : 
     491              :      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
     492              : 
     493              :    This means that we cannot guarantee that such an induction variable
     494              :    would ever hit a value that produces a set of all-false masks or zero
     495              :    lengths for RGC.
     496              : 
     497              :    Note: the cost of the code generated by this function is modeled
     498              :    by vect_estimate_min_profitable_iters, so changes here may need
     499              :    corresponding changes there.  */
     500              : 
     501              : static tree
     502            0 : vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
     503              :                                  gimple_seq *preheader_seq,
     504              :                                  gimple_seq *header_seq,
     505              :                                  gimple_stmt_iterator loop_cond_gsi,
     506              :                                  rgroup_controls *rgc, tree niters,
     507              :                                  tree niters_skip, bool might_wrap_p,
     508              :                                  tree *iv_step, tree *compare_step)
     509              : {
     510            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     511            0 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     512            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     513              : 
     514            0 :   tree ctrl_type = rgc->type;
     515            0 :   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
     516            0 :   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
     517            0 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     518            0 :   tree length_limit = NULL_TREE;
     519              :   /* For length, we need length_limit to ensure length in range.  */
     520            0 :   if (!use_masks_p)
     521            0 :     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
     522              : 
     523              :   /* Calculate the maximum number of item values that the rgroup
     524              :      handles in total, the number that it handles for each iteration
     525              :      of the vector loop, and the number that it should skip during the
     526              :      first iteration of the vector loop.  */
     527            0 :   tree nitems_total = niters;
     528            0 :   tree nitems_step = build_int_cst (iv_type, vf);
     529            0 :   tree nitems_skip = niters_skip;
     530            0 :   if (nitems_per_iter != 1)
     531              :     {
     532              :       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
     533              :          these multiplications don't overflow.  */
     534            0 :       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
     535            0 :       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
     536            0 :       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     537              :                                    nitems_total, compare_factor);
     538            0 :       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
     539              :                                   nitems_step, iv_factor);
     540            0 :       if (nitems_skip)
     541            0 :         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
     542              :                                     nitems_skip, compare_factor);
     543              :     }
     544              : 
     545              :   /* Create an induction variable that counts the number of items
     546              :      processed.  */
     547            0 :   tree index_before_incr, index_after_incr;
     548            0 :   gimple_stmt_iterator incr_gsi;
     549            0 :   bool insert_after;
     550            0 :   edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
     551            0 :   vect_iv_increment_position (exit_e, &incr_gsi, &insert_after);
     552            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
     553              :     {
     554              :       /* Create an IV that counts down from niters_total and whose step
     555              :          is the (variable) amount processed in the current iteration:
     556              :            ...
     557              :            _10 = (unsigned long) count_12(D);
     558              :            ...
     559              :            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
     560              :            _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
     561              :            ...
     562              :            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
     563              :            ...
     564              :            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
     565              :            ...
     566              :            if (ivtmp_9 > POLY_INT_CST [4, 4])
     567              :              goto <bb 4>; [83.33%]
     568              :            else
     569              :              goto <bb 5>; [16.67%]
     570              :       */
     571            0 :       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
     572            0 :       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
     573            0 :                                                : make_ssa_name (iv_type);
     574              :       /* Create decrement IV.  */
     575            0 :       if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     576              :         {
     577            0 :           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
     578              :                      insert_after, &index_before_incr, &index_after_incr);
     579            0 :           tree vectype = build_zero_cst (rgc->type);
     580            0 :           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
     581              :                                    index_before_incr, nitems_step,
     582              :                                    vectype);
     583            0 :           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
     584              :         }
     585              :       else
     586              :         {
     587            0 :           create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
     588              :                      &incr_gsi, insert_after, &index_before_incr,
     589              :                      &index_after_incr);
     590            0 :           gimple_seq_add_stmt (header_seq,
     591            0 :                                gimple_build_assign (step, MIN_EXPR,
     592              :                                                     index_before_incr,
     593              :                                                     nitems_step));
     594              :         }
     595            0 :       *iv_step = step;
     596            0 :       *compare_step = nitems_step;
     597            0 :       return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
     598            0 :                                                        : index_before_incr;
     599              :     }
     600              : 
     601              :   /* Create increment IV.  */
     602            0 :   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
     603              :              loop, &incr_gsi, insert_after, &index_before_incr,
     604              :              &index_after_incr);
     605              : 
     606            0 :   tree zero_index = build_int_cst (compare_type, 0);
     607            0 :   tree test_index, test_limit, first_limit;
     608            0 :   gimple_stmt_iterator *test_gsi;
     609            0 :   if (might_wrap_p)
     610              :     {
     611              :       /* In principle the loop should stop iterating once the incremented
     612              :          IV reaches a value greater than or equal to:
     613              : 
     614              :            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
     615              : 
     616              :          However, there's no guarantee that this addition doesn't overflow
     617              :          the comparison type, or that the IV hits a value above it before
     618              :          wrapping around.  We therefore adjust the limit down by one
     619              :          IV step:
     620              : 
     621              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     622              :            -[infinite-prec] NITEMS_STEP
     623              : 
     624              :          and compare the IV against this limit _before_ incrementing it.
     625              :          Since the comparison type is unsigned, we actually want the
     626              :          subtraction to saturate at zero:
     627              : 
     628              :            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
     629              :            -[sat] NITEMS_STEP
     630              : 
     631              :          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
     632              : 
     633              :            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
     634              : 
     635              :          where the rightmost subtraction can be done directly in
     636              :          COMPARE_TYPE.  */
     637            0 :       test_index = index_before_incr;
     638            0 :       tree adjust = gimple_convert (preheader_seq, compare_type,
     639              :                                     nitems_step);
     640            0 :       if (nitems_skip)
     641            0 :         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     642              :                                adjust, nitems_skip);
     643            0 :       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
     644              :                                  nitems_total, adjust);
     645            0 :       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
     646              :                                  test_limit, adjust);
     647            0 :       test_gsi = &incr_gsi;
     648              : 
     649              :       /* Get a safe limit for the first iteration.  */
     650            0 :       if (nitems_skip)
     651              :         {
     652              :           /* The first vector iteration can handle at most NITEMS_STEP
     653              :              items.  NITEMS_STEP <= CONST_LIMIT, and adding
     654              :              NITEMS_SKIP to that cannot overflow.  */
     655            0 :           tree const_limit = build_int_cst (compare_type,
     656            0 :                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
     657            0 :                                             * nitems_per_iter);
     658            0 :           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
     659              :                                       nitems_total, const_limit);
     660            0 :           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     661              :                                       first_limit, nitems_skip);
     662              :         }
     663              :       else
     664              :         /* For the first iteration it doesn't matter whether the IV hits
     665              :            a value above NITEMS_TOTAL.  That only matters for the latch
     666              :            condition.  */
     667              :         first_limit = nitems_total;
     668              :     }
     669              :   else
     670              :     {
     671              :       /* Test the incremented IV, which will always hit a value above
     672              :          the bound before wrapping.  */
     673            0 :       test_index = index_after_incr;
     674            0 :       test_limit = nitems_total;
     675            0 :       if (nitems_skip)
     676            0 :         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
     677              :                                    test_limit, nitems_skip);
     678              :       test_gsi = &loop_cond_gsi;
     679              : 
     680              :       first_limit = test_limit;
     681              :     }
     682              : 
     683              :   /* Convert the IV value to the comparison type (either a no-op or
     684              :      a demotion).  */
     685            0 :   gimple_seq test_seq = NULL;
     686            0 :   test_index = gimple_convert (&test_seq, compare_type, test_index);
     687            0 :   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
     688              : 
     689              :   /* Provide a definition of each control in the group.  */
     690            0 :   tree next_ctrl = NULL_TREE;
     691            0 :   tree ctrl;
     692            0 :   unsigned int i;
     693            0 :   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
     694              :     {
     695              :       /* Previous controls will cover BIAS items.  This control covers the
     696              :          next batch.  */
     697            0 :       poly_uint64 bias = nitems_per_ctrl * i;
     698            0 :       tree bias_tree = build_int_cst (compare_type, bias);
     699              : 
     700              :       /* See whether the first iteration of the vector loop is known
     701              :          to have a full control.  */
     702            0 :       poly_uint64 const_limit;
     703            0 :       bool first_iteration_full
     704            0 :         = (poly_int_tree_p (first_limit, &const_limit)
     705            0 :            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
     706              : 
     707              :       /* Rather than have a new IV that starts at BIAS and goes up to
     708              :          TEST_LIMIT, prefer to use the same 0-based IV for each control
     709              :          and adjust the bound down by BIAS.  */
     710            0 :       tree this_test_limit = test_limit;
     711            0 :       if (i != 0)
     712              :         {
     713            0 :           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
     714              :                                           compare_type, this_test_limit,
     715              :                                           bias_tree);
     716            0 :           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
     717              :                                           compare_type, this_test_limit,
     718              :                                           bias_tree);
     719              :         }
     720              : 
     721              :       /* Create the initial control.  First include all items that
     722              :          are within the loop limit.  */
     723            0 :       tree init_ctrl = NULL_TREE;
     724            0 :       if (!first_iteration_full)
     725              :         {
     726            0 :           tree start, end;
     727            0 :           if (first_limit == test_limit)
     728              :             {
     729              :               /* Use a natural test between zero (the initial IV value)
     730              :                  and the loop limit.  The "else" block would be valid too,
     731              :                  but this choice can avoid the need to load BIAS_TREE into
     732              :                  a register.  */
     733              :               start = zero_index;
     734              :               end = this_test_limit;
     735              :             }
     736              :           else
     737              :             {
     738              :               /* FIRST_LIMIT is the maximum number of items handled by the
     739              :                  first iteration of the vector loop.  Test the portion
     740              :                  associated with this control.  */
     741            0 :               start = bias_tree;
     742            0 :               end = first_limit;
     743              :             }
     744              : 
     745            0 :           if (use_masks_p)
     746            0 :             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
     747              :                                         start, end, "max_mask");
     748              :           else
     749              :             {
     750            0 :               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
     751            0 :               gimple_seq seq = vect_gen_len (init_ctrl, start,
     752              :                                              end, length_limit);
     753            0 :               gimple_seq_add_seq (preheader_seq, seq);
     754              :             }
     755              :         }
     756              : 
     757              :       /* Now AND out the bits that are within the number of skipped
     758              :          items.  */
     759            0 :       poly_uint64 const_skip;
     760            0 :       if (nitems_skip
     761            0 :           && !(poly_int_tree_p (nitems_skip, &const_skip)
     762            0 :                && known_le (const_skip, bias)))
     763              :         {
     764            0 :           gcc_assert (use_masks_p);
     765            0 :           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
     766              :                                                     bias_tree, nitems_skip);
     767            0 :           if (init_ctrl)
     768            0 :             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
     769              :                                       init_ctrl, unskipped_mask);
     770              :           else
     771              :             init_ctrl = unskipped_mask;
     772              :         }
     773              : 
     774            0 :       if (!init_ctrl)
     775              :         {
     776              :           /* First iteration is full.  */
     777            0 :           if (use_masks_p)
     778            0 :             init_ctrl = build_minus_one_cst (ctrl_type);
     779              :           else
     780              :             init_ctrl = length_limit;
     781              :         }
     782              : 
     783              :       /* Get the control value for the next iteration of the loop.  */
     784            0 :       if (use_masks_p)
     785              :         {
     786            0 :           gimple_seq stmts = NULL;
     787            0 :           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
     788              :                                       this_test_limit, "next_mask");
     789            0 :           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
     790              :         }
     791              :       else
     792              :         {
     793            0 :           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
     794            0 :           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
     795              :                                          length_limit);
     796            0 :           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
     797              :         }
     798              : 
     799            0 :       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
     800              :     }
     801              : 
     802            0 :   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
     803            0 :   if (partial_load_bias != 0)
     804              :     {
     805            0 :       tree adjusted_len = rgc->bias_adjusted_ctrl;
     806            0 :       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
     807            0 :                                             rgc->controls[0],
     808              :                                             build_int_cst
     809            0 :                                             (TREE_TYPE (rgc->controls[0]),
     810            0 :                                              partial_load_bias));
     811            0 :       gimple_seq_add_stmt (header_seq, minus);
     812              :     }
     813              : 
     814              :   return next_ctrl;
     815              : }
     816              : 
     817              : /* Set up the iteration condition and rgroup controls for LOOP, given
     818              :    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
     819              :    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     820              :    the number of iterations of the original scalar loop that should be
     821              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     822              :    for vect_set_loop_condition.
     823              : 
     824              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     825              :    final gcond.  */
     826              : 
     827              : static gcond *
     828            0 : vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
     829              :                                          loop_vec_info loop_vinfo, tree niters,
     830              :                                          tree final_iv, bool niters_maybe_zero,
     831              :                                          gimple_stmt_iterator loop_cond_gsi)
     832              : {
     833            0 :   gimple_seq preheader_seq = NULL;
     834            0 :   gimple_seq header_seq = NULL;
     835              : 
     836            0 :   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
     837            0 :   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
     838            0 :   unsigned int compare_precision = TYPE_PRECISION (compare_type);
     839            0 :   tree orig_niters = niters;
     840              : 
     841              :   /* Type of the initial value of NITERS.  */
     842            0 :   tree ni_actual_type = TREE_TYPE (niters);
     843            0 :   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
     844            0 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     845            0 :   if (niters_skip)
     846            0 :     niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
     847              : 
     848              :   /* Convert NITERS to the same size as the compare.  */
     849            0 :   if (compare_precision > ni_actual_precision
     850            0 :       && niters_maybe_zero)
     851              :     {
     852              :       /* We know that there is always at least one iteration, so if the
     853              :          count is zero then it must have wrapped.  Cope with this by
     854              :          subtracting 1 before the conversion and adding 1 to the result.  */
     855            0 :       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
     856            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
     857              :                              niters, build_minus_one_cst (ni_actual_type));
     858            0 :       niters = gimple_convert (&preheader_seq, compare_type, niters);
     859            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
     860              :                              niters, build_one_cst (compare_type));
     861              :     }
     862              :   else
     863            0 :     niters = gimple_convert (&preheader_seq, compare_type, niters);
     864              : 
     865              :   /* Iterate over all the rgroups and fill in their controls.  We could use
     866              :      the first control from any rgroup for the loop condition; here we
     867              :      arbitrarily pick the last.  */
     868            0 :   tree test_ctrl = NULL_TREE;
     869            0 :   tree iv_step = NULL_TREE;
     870            0 :   tree compare_step = NULL_TREE;
     871            0 :   rgroup_controls *rgc;
     872            0 :   rgroup_controls *iv_rgc = nullptr;
     873            0 :   unsigned int i;
     874            0 :   auto_vec<rgroup_controls> *controls = use_masks_p
     875            0 :                                           ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
     876              :                                           : &LOOP_VINFO_LENS (loop_vinfo);
     877            0 :   FOR_EACH_VEC_ELT (*controls, i, rgc)
     878            0 :     if (!rgc->controls.is_empty ())
     879              :       {
     880              :         /* First try using permutes.  This adds a single vector
     881              :            instruction to the loop for each mask, but needs no extra
     882              :            loop invariants or IVs.  */
     883            0 :         unsigned int nmasks = i + 1;
     884            0 :         if (use_masks_p && (nmasks & 1) == 0)
     885              :           {
     886            0 :             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
     887            0 :             if (!half_rgc->controls.is_empty ()
     888            0 :                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
     889            0 :               continue;
     890              :           }
     891              : 
     892            0 :         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     893            0 :             || !iv_rgc
     894            0 :             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
     895            0 :                 != rgc->max_nscalars_per_iter * rgc->factor))
     896              :           {
     897              :             /* See whether zero-based IV would ever generate all-false masks
     898              :                or zero length before wrapping around.  */
     899            0 :             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
     900              : 
     901              :             /* Set up all controls for this group.  */
     902            0 :             test_ctrl
     903            0 :               = vect_set_loop_controls_directly (loop, loop_vinfo,
     904              :                                                  &preheader_seq, &header_seq,
     905              :                                                  loop_cond_gsi, rgc, niters,
     906              :                                                  niters_skip, might_wrap_p,
     907              :                                                  &iv_step, &compare_step);
     908              : 
     909            0 :             iv_rgc = rgc;
     910              :           }
     911              : 
     912            0 :         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     913            0 :             && rgc->controls.length () > 1)
     914              :           {
     915              :             /* vect_set_loop_controls_directly creates an IV whose step
     916              :                is equal to the expected sum of RGC->controls.  Use that
     917              :                information to populate RGC->controls.  */
     918            0 :             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     919            0 :             gcc_assert (iv_step);
     920            0 :             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
     921              :           }
     922              :       }
     923              : 
     924              :   /* Emit all accumulated statements.  */
     925            0 :   add_preheader_seq (loop, preheader_seq);
     926            0 :   add_header_seq (loop, header_seq);
     927              : 
     928              :   /* Get a boolean result that tells us whether to iterate.  */
     929            0 :   gcond *cond_stmt;
     930            0 :   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
     931            0 :       && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
     932              :     {
     933            0 :       gcc_assert (compare_step);
     934            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
     935            0 :       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
     936              :                                      NULL_TREE);
     937            0 :     }
     938              :   else
     939              :     {
     940            0 :       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
     941            0 :       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
     942            0 :       cond_stmt
     943            0 :         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
     944              :     }
     945            0 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
     946              : 
     947              :   /* The loop iterates (NITERS - 1) / VF + 1 times.
     948              :      Subtract one from this to get the latch count.  */
     949            0 :   tree step = build_int_cst (compare_type,
     950            0 :                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     951            0 :   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
     952              :                                        build_minus_one_cst (compare_type));
     953            0 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
     954              :                                      niters_minus_one, step);
     955              : 
     956            0 :   if (final_iv)
     957              :     {
     958            0 :       gassign *assign;
     959              :       /* If vectorizing an inverted early break loop we have to restart the
     960              :          scalar loop at niters - vf.  This matches what we do in
     961              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
     962            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     963              :         {
     964            0 :           tree ftype = TREE_TYPE (orig_niters);
     965            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
     966            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
     967              :         }
     968              :        else
     969            0 :         assign = gimple_build_assign (final_iv, orig_niters);
     970            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
     971              :     }
     972              : 
     973            0 :   return cond_stmt;
     974              : }
     975              : 
     976              : /* Set up the iteration condition and rgroup controls for LOOP in AVX512
     977              :    style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
     978              :    vectorized loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
     979              :    the number of iterations of the original scalar loop that should be
     980              :    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
     981              :    for vect_set_loop_condition.
     982              : 
     983              :    Insert the branch-back condition before LOOP_COND_GSI and return the
     984              :    final gcond.  */
     985              : 
     986              : static gcond *
     987           18 : vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
     988              :                                          edge exit_edge,
     989              :                                          loop_vec_info loop_vinfo, tree niters,
     990              :                                          tree final_iv,
     991              :                                          bool niters_maybe_zero,
     992              :                                          gimple_stmt_iterator loop_cond_gsi)
     993              : {
     994           18 :   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
     995           18 :   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
     996           18 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
     997           18 :   tree orig_niters = niters;
     998           18 :   gimple_seq preheader_seq = NULL;
     999              : 
    1000              :   /* Create an IV that counts down from niters and whose step
    1001              :      is the number of iterations processed in the current iteration.
    1002              :      Produce the controls with compares like the following.
    1003              : 
    1004              :        # iv_2 = PHI <niters, iv_3>
    1005              :        rem_4 = MIN <iv_2, VF>;
    1006              :        remv_6 = { rem_4, rem_4, rem_4, ... }
    1007              :        mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
    1008              :        iv_3 = iv_2 - VF;
    1009              :        if (iv_2 > VF)
    1010              :          continue;
    1011              : 
    1012              :      Where the constant is built with elements at most VF - 1 and
    1013              :      repetitions according to max_nscalars_per_iter which is guarnateed
    1014              :      to be the same within a group.  */
    1015              : 
    1016              :   /* Convert NITERS to the determined IV type.  */
    1017           18 :   if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
    1018           18 :       && niters_maybe_zero)
    1019              :     {
    1020              :       /* We know that there is always at least one iteration, so if the
    1021              :          count is zero then it must have wrapped.  Cope with this by
    1022              :          subtracting 1 before the conversion and adding 1 to the result.  */
    1023            0 :       gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
    1024            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
    1025            0 :                              niters, build_minus_one_cst (TREE_TYPE (niters)));
    1026            0 :       niters = gimple_convert (&preheader_seq, iv_type, niters);
    1027            0 :       niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
    1028              :                              niters, build_one_cst (iv_type));
    1029              :     }
    1030              :   else
    1031           18 :     niters = gimple_convert (&preheader_seq, iv_type, niters);
    1032              : 
    1033              :   /* Bias the initial value of the IV in case we need to skip iterations
    1034              :      at the beginning.  */
    1035           18 :   tree niters_adj = niters;
    1036           18 :   if (niters_skip)
    1037              :     {
    1038            1 :       tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
    1039            1 :       niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
    1040              :                                  iv_type, niters, skip);
    1041              :     }
    1042              : 
    1043              :   /* The iteration step is the vectorization factor.  */
    1044           18 :   tree iv_step = build_int_cst (iv_type, vf);
    1045              : 
    1046              :   /* Create the decrement IV.  */
    1047           18 :   tree index_before_incr, index_after_incr;
    1048           18 :   gimple_stmt_iterator incr_gsi;
    1049           18 :   bool insert_after;
    1050           18 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1051           18 :   create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
    1052              :              &incr_gsi, insert_after, &index_before_incr,
    1053              :              &index_after_incr);
    1054              : 
    1055              :   /* Iterate over all the rgroups and fill in their controls.  */
    1056           80 :   for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
    1057              :     {
    1058           26 :       if (rgc.controls.is_empty ())
    1059            6 :         continue;
    1060              : 
    1061           20 :       tree ctrl_type = rgc.type;
    1062           20 :       poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
    1063              : 
    1064           20 :       tree vectype = rgc.compare_type;
    1065              : 
    1066              :       /* index_after_incr is the IV specifying the remaining iterations in
    1067              :          the next iteration.  */
    1068           20 :       tree rem = index_after_incr;
    1069              :       /* When the data type for the compare to produce the mask is
    1070              :          smaller than the IV type we need to saturate.  Saturate to
    1071              :          the smallest possible value (IV_TYPE) so we only have to
    1072              :          saturate once (CSE will catch redundant ones we add).  */
    1073           20 :       if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
    1074            9 :         rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1075              :                             UNKNOWN_LOCATION,
    1076            9 :                             MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
    1077           20 :       rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1078           20 :                             UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
    1079              : 
    1080              :       /* Build a data vector composed of the remaining iterations.  */
    1081           20 :       rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1082              :                                           UNKNOWN_LOCATION, vectype, rem);
    1083              : 
    1084              :       /* Provide a definition of each vector in the control group.  */
    1085           20 :       tree next_ctrl = NULL_TREE;
    1086           20 :       tree first_rem = NULL_TREE;
    1087           20 :       tree ctrl;
    1088           20 :       unsigned int i;
    1089          238 :       FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
    1090              :         {
    1091              :           /* Previous controls will cover BIAS items.  This control covers the
    1092              :              next batch.  */
    1093           86 :           poly_uint64 bias = nitems_per_ctrl * i;
    1094              : 
    1095              :           /* Build the constant to compare the remaining iters against,
    1096              :              this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
    1097              :              split into pieces.  */
    1098           86 :           unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
    1099           86 :           tree_vector_builder builder (vectype, n, 1);
    1100         1266 :           for (unsigned i = 0; i < n; ++i)
    1101              :             {
    1102         1180 :               unsigned HOST_WIDE_INT val
    1103         1180 :                 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
    1104         1180 :               gcc_assert (val < vf.to_constant ());
    1105         1180 :               builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
    1106              :             }
    1107           86 :           tree cmp_series = builder.build ();
    1108              : 
    1109              :           /* Create the initial control.  First include all items that
    1110              :              are within the loop limit.  */
    1111           86 :           tree init_ctrl = NULL_TREE;
    1112           86 :           poly_uint64 const_limit;
    1113              :           /* See whether the first iteration of the vector loop is known
    1114              :              to have a full control.  */
    1115           86 :           if (poly_int_tree_p (niters, &const_limit)
    1116           86 :               && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
    1117            1 :             init_ctrl = build_minus_one_cst (ctrl_type);
    1118              :           else
    1119              :             {
    1120              :               /* The remaining work items initially are niters.  Saturate,
    1121              :                  splat and compare.  */
    1122           85 :               if (!first_rem)
    1123              :                 {
    1124           19 :                   first_rem = niters;
    1125           19 :                   if (TYPE_PRECISION (TREE_TYPE (vectype))
    1126           19 :                       < TYPE_PRECISION (iv_type))
    1127            9 :                     first_rem = gimple_build (&preheader_seq,
    1128            9 :                                               MIN_EXPR, TREE_TYPE (first_rem),
    1129              :                                               first_rem, iv_step);
    1130           19 :                   first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
    1131              :                                               first_rem);
    1132           19 :                   first_rem = gimple_build_vector_from_val (&preheader_seq,
    1133              :                                                             vectype, first_rem);
    1134              :                 }
    1135           85 :               init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
    1136              :                                         cmp_series, first_rem);
    1137              :             }
    1138              : 
    1139              :           /* Now AND out the bits that are within the number of skipped
    1140              :              items.  */
    1141           86 :           poly_uint64 const_skip;
    1142           86 :           if (niters_skip
    1143           86 :               && !(poly_int_tree_p (niters_skip, &const_skip)
    1144            1 :                    && known_le (const_skip, bias)))
    1145              :             {
    1146              :               /* For integer mode masks it's cheaper to shift out the bits
    1147              :                  since that avoids loading a constant.  */
    1148            1 :               gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
    1149            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1150            1 :                                         lang_hooks.types.type_for_mode
    1151            1 :                                           (TYPE_MODE (ctrl_type), 1),
    1152              :                                         init_ctrl);
    1153              :               /* ???  But when the shift amount isn't constant this requires
    1154              :                  a round-trip to GRPs.  We could apply the bias to either
    1155              :                  side of the compare instead.  */
    1156            2 :               tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
    1157            1 :                                          TREE_TYPE (niters_skip), niters_skip,
    1158            1 :                                          build_int_cst (TREE_TYPE (niters_skip),
    1159            1 :                                                         bias));
    1160            2 :               shift = gimple_build (&preheader_seq, MULT_EXPR,
    1161            1 :                                     TREE_TYPE (niters_skip), shift,
    1162            1 :                                     build_int_cst (TREE_TYPE (niters_skip),
    1163            1 :                                                    rgc.max_nscalars_per_iter));
    1164            1 :               init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
    1165            1 :                                         TREE_TYPE (init_ctrl),
    1166              :                                         init_ctrl, shift);
    1167            1 :               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
    1168              :                                         ctrl_type, init_ctrl);
    1169              :             }
    1170              : 
    1171              :           /* Get the control value for the next iteration of the loop.  */
    1172           86 :           next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
    1173              :                                     UNKNOWN_LOCATION,
    1174              :                                     LT_EXPR, ctrl_type, cmp_series, rem);
    1175              : 
    1176           86 :           vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
    1177           86 :         }
    1178              :     }
    1179              : 
    1180              :   /* Emit all accumulated statements.  */
    1181           18 :   add_preheader_seq (loop, preheader_seq);
    1182              : 
    1183              :   /* Adjust the exit test using the decrementing IV.  */
    1184           18 :   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
    1185              :   /* When we peel for alignment with niter_skip != 0 this can
    1186              :      cause niter + niter_skip to wrap and since we are comparing the
    1187              :      value before the decrement here we get a false early exit.
    1188              :      We can't compare the value after decrement either because that
    1189              :      decrement could wrap as well as we're not doing a saturating
    1190              :      decrement.  To avoid this situation we force a larger
    1191              :      iv_type.  */
    1192           18 :   gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
    1193              :                                         NULL_TREE, NULL_TREE);
    1194           18 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1195              : 
    1196              :   /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
    1197              :      Subtract one from this to get the latch count.  */
    1198           18 :   tree niters_minus_one
    1199           18 :     = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
    1200              :                    build_minus_one_cst (TREE_TYPE (orig_niters)));
    1201           18 :   tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
    1202           18 :   if (niters_skip)
    1203            1 :     niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
    1204              :                                fold_convert (iv_type, niters_skip));
    1205           18 :   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
    1206              :                                      niters_adj2, iv_step);
    1207              : 
    1208           18 :   if (final_iv)
    1209              :     {
    1210            0 :       gassign *assign;
    1211              :       /* If vectorizing an inverted early break loop we have to restart the
    1212              :          scalar loop at niters - vf.  This matches what we do in
    1213              :          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
    1214            0 :       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    1215              :         {
    1216            0 :           tree ftype = TREE_TYPE (orig_niters);
    1217            0 :           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    1218            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
    1219              :         }
    1220              :        else
    1221            0 :         assign = gimple_build_assign (final_iv, orig_niters);
    1222            0 :       gsi_insert_on_edge_immediate (exit_edge, assign);
    1223              :     }
    1224              : 
    1225           18 :   return cond_stmt;
    1226              : }
    1227              : 
    1228              : 
    1229              : /* Like vect_set_loop_condition, but handle the case in which the vector
    1230              :    loop handles exactly VF scalars per iteration.  */
    1231              : 
    1232              : static gcond *
    1233        61933 : vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
    1234              :                                 class loop *loop, tree niters, tree step,
    1235              :                                 tree final_iv, bool niters_maybe_zero,
    1236              :                                 gimple_stmt_iterator loop_cond_gsi)
    1237              : {
    1238        61933 :   tree indx_before_incr, indx_after_incr;
    1239        61933 :   gcond *cond_stmt;
    1240        61933 :   gcond *orig_cond;
    1241        61933 :   edge pe = loop_preheader_edge (loop);
    1242        61933 :   gimple_stmt_iterator incr_gsi;
    1243        61933 :   bool insert_after;
    1244        61933 :   enum tree_code code;
    1245        61933 :   tree niters_type = TREE_TYPE (niters);
    1246              : 
    1247        61933 :   orig_cond = get_loop_exit_condition (exit_edge);
    1248        61933 :   gcc_assert (orig_cond);
    1249        61933 :   loop_cond_gsi = gsi_for_stmt (orig_cond);
    1250              : 
    1251        61933 :   tree init, limit;
    1252        61933 :   if (!niters_maybe_zero && integer_onep (step))
    1253              :     {
    1254              :       /* In this case we can use a simple 0-based IV:
    1255              : 
    1256              :          A:
    1257              :            x = 0;
    1258              :            do
    1259              :              {
    1260              :                ...
    1261              :                x += 1;
    1262              :              }
    1263              :            while (x < NITERS);  */
    1264        61933 :       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1265        61933 :       init = build_zero_cst (niters_type);
    1266        61933 :       limit = niters;
    1267              :     }
    1268              :   else
    1269              :     {
    1270              :       /* The following works for all values of NITERS except 0:
    1271              : 
    1272              :          B:
    1273              :            x = 0;
    1274              :            do
    1275              :              {
    1276              :                ...
    1277              :                x += STEP;
    1278              :              }
    1279              :            while (x <= NITERS - STEP);
    1280              : 
    1281              :          so that the loop continues to iterate if x + STEP - 1 < NITERS
    1282              :          but stops if x + STEP - 1 >= NITERS.
    1283              : 
    1284              :          However, if NITERS is zero, x never hits a value above NITERS - STEP
    1285              :          before wrapping around.  There are two obvious ways of dealing with
    1286              :          this:
    1287              : 
    1288              :          - start at STEP - 1 and compare x before incrementing it
    1289              :          - start at -1 and compare x after incrementing it
    1290              : 
    1291              :          The latter is simpler and is what we use.  The loop in this case
    1292              :          looks like:
    1293              : 
    1294              :          C:
    1295              :            x = -1;
    1296              :            do
    1297              :              {
    1298              :                ...
    1299              :                x += STEP;
    1300              :              }
    1301              :            while (x < NITERS - STEP);
    1302              : 
    1303              :          In both cases the loop limit is NITERS - STEP.  */
    1304            0 :       gimple_seq seq = NULL;
    1305            0 :       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
    1306            0 :       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
    1307            0 :       if (seq)
    1308              :         {
    1309            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
    1310            0 :           gcc_assert (!new_bb);
    1311              :         }
    1312            0 :       if (niters_maybe_zero)
    1313              :         {
    1314              :           /* Case C.  */
    1315            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
    1316            0 :           init = build_all_ones_cst (niters_type);
    1317              :         }
    1318              :       else
    1319              :         {
    1320              :           /* Case B.  */
    1321            0 :           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
    1322            0 :           init = build_zero_cst (niters_type);
    1323              :         }
    1324              :     }
    1325              : 
    1326        61933 :   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
    1327        61933 :   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
    1328              :              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
    1329        61933 :   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
    1330              :                                               true, NULL_TREE, true,
    1331              :                                               GSI_SAME_STMT);
    1332        61933 :   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
    1333              :                                      true, GSI_SAME_STMT);
    1334              : 
    1335        61933 :   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
    1336              :                                  NULL_TREE);
    1337              : 
    1338        61933 :   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
    1339              : 
    1340              :   /* Record the number of latch iterations.  */
    1341        61933 :   if (limit == niters)
    1342              :     /* Case A: the loop iterates NITERS times.  Subtract one to get the
    1343              :        latch count.  */
    1344        61933 :     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
    1345              :                                        build_int_cst (niters_type, 1));
    1346              :   else
    1347              :     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
    1348              :        Subtract one from this to get the latch count.  */
    1349            0 :     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
    1350              :                                        limit, step);
    1351              : 
    1352        61933 :   if (final_iv)
    1353              :     {
    1354            0 :       gassign *assign;
    1355            0 :       gcc_assert (single_pred_p (exit_edge->dest));
    1356            0 :       tree phi_dest
    1357            0 :         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
    1358              :       /* Make sure to maintain LC SSA form here and elide the subtraction
    1359              :          if the value is zero.  */
    1360            0 :       gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
    1361            0 :       add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
    1362            0 :       if (!integer_zerop (init))
    1363              :         {
    1364            0 :           assign = gimple_build_assign (final_iv, MINUS_EXPR,
    1365              :                                         phi_dest, init);
    1366            0 :           gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
    1367            0 :           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
    1368              :         }
    1369              :     }
    1370              : 
    1371        61933 :   return cond_stmt;
    1372              : }
    1373              : 
    1374              : /* If we're using fully-masked loops, make LOOP iterate:
    1375              : 
    1376              :       N == (NITERS - 1) / STEP + 1
    1377              : 
    1378              :    times.  When NITERS is zero, this is equivalent to making the loop
    1379              :    execute (1 << M) / STEP times, where M is the precision of NITERS.
    1380              :    NITERS_MAYBE_ZERO is true if this last case might occur.
    1381              : 
    1382              :    If we're not using fully-masked loops, make LOOP iterate:
    1383              : 
    1384              :       N == (NITERS - STEP) / STEP + 1
    1385              : 
    1386              :    times, where NITERS is known to be outside the range [1, STEP - 1].
    1387              :    This is equivalent to making the loop execute NITERS / STEP times
    1388              :    when NITERS is nonzero and (1 << M) / STEP times otherwise.
    1389              :    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
    1390              : 
    1391              :    If FINAL_IV is nonnull, it is an SSA name that should be set to
    1392              :    N * STEP on exit from the loop.
    1393              : 
    1394              :    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
    1395              : 
    1396              : void
    1397        61951 : vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
    1398              :                          tree niters, tree step, tree final_iv,
    1399              :                          bool niters_maybe_zero)
    1400              : {
    1401        61951 :   gcond *cond_stmt;
    1402        61951 :   gcond *orig_cond = get_loop_exit_condition (loop_e);
    1403        61951 :   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
    1404              : 
    1405        61951 :   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    1406              :     {
    1407           18 :       if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
    1408           18 :         cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
    1409              :                                                                     loop_vinfo,
    1410              :                                                                     niters, final_iv,
    1411              :                                                                     niters_maybe_zero,
    1412              :                                                                     loop_cond_gsi);
    1413              :       else
    1414            0 :         cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
    1415              :                                                              loop_vinfo,
    1416              :                                                              niters, final_iv,
    1417              :                                                              niters_maybe_zero,
    1418              :                                                              loop_cond_gsi);
    1419              :     }
    1420              :   else
    1421        61933 :     cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
    1422              :                                                 niters,
    1423              :                                                 step, final_iv,
    1424              :                                                 niters_maybe_zero,
    1425              :                                                 loop_cond_gsi);
    1426              : 
    1427              :   /* Remove old loop exit test.  */
    1428        61951 :   stmt_vec_info orig_cond_info;
    1429        61951 :   if (loop_vinfo
    1430        61951 :       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
    1431        61523 :     loop_vinfo->remove_stmt (orig_cond_info);
    1432              :   else
    1433          428 :     gsi_remove (&loop_cond_gsi, true);
    1434              : 
    1435        61951 :   if (dump_enabled_p ())
    1436        11222 :     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
    1437              :                      (gimple *) cond_stmt);
    1438        61951 : }
    1439              : 
    1440              : /* Get the virtual operand live on E.  The precondition on this is valid
    1441              :    immediate dominators and an actual virtual definition dominating E.  */
    1442              : /* ???  Costly band-aid.  For the use in question we can populate a
    1443              :    live-on-exit/end-of-BB virtual operand when copying stmts.  */
    1444              : 
    1445              : static tree
    1446           10 : get_live_virtual_operand_on_edge (edge e)
    1447              : {
    1448           10 :   basic_block bb = e->src;
    1449           22 :   do
    1450              :     {
    1451           61 :       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    1452              :         {
    1453           37 :           gimple *stmt = gsi_stmt (gsi);
    1454           55 :           if (gimple_vdef (stmt))
    1455           10 :             return gimple_vdef (stmt);
    1456           39 :           if (gimple_vuse (stmt))
    1457              :             return gimple_vuse (stmt);
    1458              :         }
    1459            8 :       if (gphi *vphi = get_virtual_phi (bb))
    1460            2 :         return gimple_phi_result (vphi);
    1461            6 :       bb = get_immediate_dominator (CDI_DOMINATORS, bb);
    1462            6 :     }
    1463              :   while (1);
    1464              : }
    1465              : 
    1466              : /* Given LOOP this function generates a new copy of it and puts it
    1467              :    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
    1468              :    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
    1469              :    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
    1470              :    entry or exit of LOOP.  If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
    1471              :    continuation.  This is correct for cases where one loop continues from the
    1472              :    other like in the vectorizer, but not true for uses in e.g. loop distribution
    1473              :    where the contents of the loop body are split but the iteration space of both
    1474              :    copies remains the same.
    1475              : 
    1476              :    If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
    1477              :    dominators were updated during the peeling.  When doing early break vectorization
    1478              :    then LOOP_VINFO needs to be provided and is used to keep track of any newly created
    1479              :    memory references that need to be updated should we decide to vectorize.  */
    1480              : 
    1481              : class loop *
    1482        34334 : slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
    1483              :                                         class loop *scalar_loop,
    1484              :                                         edge scalar_exit, edge e, edge *new_e,
    1485              :                                         bool flow_loops,
    1486              :                                         vec<basic_block> *updated_doms,
    1487              :                                         bool uncounted_p, bool create_main_e,
    1488              :                                         bool redirect_exits)
    1489              : {
    1490        34334 :   class loop *new_loop;
    1491        34334 :   basic_block *new_bbs, *bbs, *pbbs;
    1492        34334 :   bool at_exit;
    1493        34334 :   bool was_imm_dom;
    1494        34334 :   basic_block exit_dest;
    1495        34334 :   edge exit, new_exit;
    1496        34334 :   bool duplicate_outer_loop = false;
    1497              : 
    1498        34334 :   exit = loop_exit;
    1499        34334 :   at_exit = (e == exit);
    1500        34334 :   if (!at_exit && e != loop_preheader_edge (loop))
    1501              :     return NULL;
    1502              : 
    1503        34334 :   if (scalar_loop == NULL)
    1504              :     {
    1505        31805 :       scalar_loop = loop;
    1506        31805 :       scalar_exit = loop_exit;
    1507              :     }
    1508         2529 :   else if (scalar_loop == loop)
    1509            0 :     scalar_exit = loop_exit;
    1510              :   else
    1511              :     {
    1512              :       /* Loop has been version, match exits up using the aux index.  */
    1513         7587 :       for (edge exit : get_loop_exit_edges (scalar_loop))
    1514         2529 :         if (exit->aux == loop_exit->aux)
    1515              :           {
    1516         2529 :             scalar_exit = exit;
    1517         2529 :             break;
    1518         2529 :           }
    1519              : 
    1520         2529 :       gcc_assert (scalar_exit);
    1521              :     }
    1522              : 
    1523        34334 :   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1524        34334 :   pbbs = bbs + 1;
    1525        34334 :   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
    1526              :   /* Allow duplication of outer loops.  */
    1527        34334 :   if (scalar_loop->inner)
    1528          132 :     duplicate_outer_loop = true;
    1529              : 
    1530              :   /* Generate new loop structure.  */
    1531        34334 :   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
    1532        34334 :   duplicate_subloops (scalar_loop, new_loop);
    1533              : 
    1534        34334 :   exit_dest = exit->dest;
    1535        34334 :   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
    1536        34334 :                                           exit_dest) == exit->src ?
    1537              :                  true : false);
    1538              : 
    1539              :   /* Also copy the pre-header, this avoids jumping through hoops to
    1540              :      duplicate the loop entry PHI arguments.  Create an empty
    1541              :      pre-header unconditionally for this.  */
    1542        34334 :   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
    1543        34334 :   edge entry_e = single_pred_edge (preheader);
    1544        34334 :   bbs[0] = preheader;
    1545        34334 :   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
    1546              : 
    1547        34334 :   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
    1548              :             &scalar_exit, 1, &new_exit, NULL,
    1549              :             at_exit ? loop->latch : e->src, true);
    1550        34334 :   exit = loop_exit;
    1551        34334 :   basic_block new_preheader = new_bbs[0];
    1552              : 
    1553        34334 :   gcc_assert (new_exit);
    1554              : 
    1555              :   /* Record the new loop exit information.  new_loop doesn't have SCEV data and
    1556              :      so we must initialize the exit information.  */
    1557        34334 :   if (new_e)
    1558        33175 :     *new_e = new_exit;
    1559              : 
    1560              :   /* Before installing PHI arguments make sure that the edges
    1561              :      into them match that of the scalar loop we analyzed.  This
    1562              :      makes sure the SLP tree matches up between the main vectorized
    1563              :      loop and the epilogue vectorized copies.  */
    1564        34334 :   if (single_succ_edge (preheader)->dest_idx
    1565        34334 :       != single_succ_edge (new_bbs[0])->dest_idx)
    1566              :     {
    1567        29335 :       basic_block swap_bb = new_bbs[1];
    1568        29335 :       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1569        29335 :       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1570        29335 :       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1571        29335 :       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1572              :     }
    1573        34334 :   if (duplicate_outer_loop)
    1574              :     {
    1575          132 :       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
    1576          132 :       if (loop_preheader_edge (scalar_loop)->dest_idx
    1577          132 :           != loop_preheader_edge (new_inner_loop)->dest_idx)
    1578              :         {
    1579           99 :           basic_block swap_bb = new_inner_loop->header;
    1580           99 :           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
    1581           99 :           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
    1582           99 :           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
    1583           99 :           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
    1584              :         }
    1585              :     }
    1586              : 
    1587        34334 :   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
    1588              : 
    1589              :   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
    1590       178928 :   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
    1591       110260 :     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
    1592              : 
    1593              :   /* Rename the exit uses.  */
    1594       104158 :   for (edge exit : get_loop_exit_edges (new_loop))
    1595        35490 :     for (auto gsi = gsi_start_phis (exit->dest);
    1596        83614 :          !gsi_end_p (gsi); gsi_next (&gsi))
    1597              :       {
    1598        48124 :         tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
    1599        48124 :         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
    1600        48124 :         if (MAY_HAVE_DEBUG_BIND_STMTS)
    1601        22910 :           adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
    1602        34334 :       }
    1603              : 
    1604        34334 :   auto loop_exits = get_loop_exit_edges (loop);
    1605        34334 :   bool has_multiple_exits_p = loop_exits.length () > 1;
    1606              : 
    1607              :   /* If REDIRECT_EXITS is false we leave the alternative exits untouched and
    1608              :      treat the duplication as if the loop only had the main exit.  */
    1609        34334 :   bool redirect_multiple_exits_p = redirect_exits && has_multiple_exits_p;
    1610        34334 :   auto_vec<basic_block> doms;
    1611              : 
    1612        34334 :   if (at_exit) /* Add the loop copy at exit.  */
    1613              :     {
    1614        32747 :       if (scalar_loop != loop && new_exit->dest != exit_dest)
    1615              :         {
    1616         2525 :           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
    1617         2525 :           flush_pending_stmts (new_exit);
    1618              :         }
    1619              : 
    1620        32747 :       bool need_virtual_phi = get_virtual_phi (loop->header);
    1621              : 
    1622              :       /* For the main loop exit preserve the LC PHI nodes.  For vectorization
    1623              :          we need them to continue or finalize reductions.  Since we do not
    1624              :          copy the loop exit blocks we have to materialize PHIs at the
    1625              :          new destination before redirecting edges.  */
    1626        32747 :       for (auto gsi_from = gsi_start_phis (loop_exit->dest);
    1627        78080 :            !gsi_end_p (gsi_from); gsi_next (&gsi_from))
    1628              :         {
    1629        45333 :           tree res = gimple_phi_result (*gsi_from);
    1630        45333 :           create_phi_node (copy_ssa_name (res), new_preheader);
    1631              :         }
    1632        32747 :       edge e = redirect_edge_and_branch (loop_exit, new_preheader);
    1633        32747 :       gcc_assert (e == loop_exit);
    1634        32747 :       flush_pending_stmts (loop_exit);
    1635        32747 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
    1636              : 
    1637              :       /* If we ended up choosing an exit leading to a path not using memory
    1638              :          we can end up without a virtual LC PHI.  Create it when it is
    1639              :          needed because of the epilog loop continuation.  */
    1640        32747 :       if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
    1641              :         {
    1642            8 :           tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
    1643            8 :           gphi *vphi = create_phi_node (copy_ssa_name (header_def),
    1644              :                                         new_preheader);
    1645            8 :           add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
    1646              :                        loop_exit, UNKNOWN_LOCATION);
    1647              :         }
    1648              : 
    1649        32747 :       basic_block main_loop_exit_block = new_preheader;
    1650        32747 :       basic_block alt_loop_exit_block = new_preheader;
    1651              :       /* When we redirect the other exits create the CFG
    1652              :          below to funnel everything through the merge block:
    1653              :                    | loop_exit               | alt1   | altN
    1654              :                    v                         v   ...  v
    1655              :             main_loop_exit_block:       alt_loop_exit_block:
    1656              :                    |                      /
    1657              :                    v                     v
    1658              :             new_preheader:
    1659              :          where in the new preheader we need merge PHIs for
    1660              :          the continuation values into the epilogue header.
    1661              :          Do not bother with exit PHIs for the early exits but
    1662              :          their live virtual operand.  We'll fix up things below.  */
    1663        32747 :       if (redirect_multiple_exits_p || uncounted_p)
    1664              :         {
    1665          644 :           edge loop_e = single_succ_edge (new_preheader);
    1666          644 :           new_preheader = split_edge (loop_e);
    1667              : 
    1668          644 :         if (redirect_exits)
    1669              :           {
    1670          638 :             gphi *vphi = NULL;
    1671          638 :             alt_loop_exit_block = new_preheader;
    1672         3312 :             for (auto exit : loop_exits)
    1673         1398 :               if (exit != loop_exit)
    1674              :                 {
    1675          760 :                   tree vphi_def = NULL_TREE;
    1676          760 :                   if (gphi *evphi = get_virtual_phi (exit->dest))
    1677          452 :                     vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
    1678          760 :                   edge res
    1679          760 :                     = redirect_edge_and_branch (exit, alt_loop_exit_block);
    1680          760 :                   gcc_assert (res == exit);
    1681          760 :                   redirect_edge_var_map_clear (exit);
    1682              : 
    1683          760 :                   if (alt_loop_exit_block == new_preheader)
    1684          615 :                     alt_loop_exit_block = split_edge (exit);
    1685          760 :                   if (!need_virtual_phi)
    1686          316 :                     continue;
    1687              : 
    1688              :                   /* When the edge has no virtual LC PHI get at the live
    1689              :                      virtual operand by other means.  */
    1690          444 :                   if (!vphi_def)
    1691            2 :                     vphi_def = get_live_virtual_operand_on_edge (exit);
    1692              : 
    1693          444 :                   if (!vphi)
    1694          410 :                     vphi = create_phi_node (copy_ssa_name (vphi_def),
    1695              :                                           alt_loop_exit_block);
    1696              :                   else
    1697              :                     /* Edge redirection might re-allocate the PHI node
    1698              :                        so we have to rediscover it.  */
    1699           34 :                     vphi = get_virtual_phi (alt_loop_exit_block);
    1700          444 :                   add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
    1701              :                 }
    1702              :           }
    1703              : 
    1704          644 :           set_immediate_dominator (CDI_DOMINATORS, new_preheader,
    1705              :                                    loop->header);
    1706              : 
    1707              :           /* Fix up the profile counts of the new exit blocks.
    1708              :              main_loop_exit_block was created by duplicating the
    1709              :              preheader, so needs its count scaling according to the main
    1710              :              exit edge's probability.  The remaining count from the
    1711              :              preheader goes to the alt_loop_exit_block, since all
    1712              :              alternative exits have been redirected there.  */
    1713          644 :           main_loop_exit_block->count = loop_exit->count ();
    1714          644 :           alt_loop_exit_block->count
    1715          644 :             = preheader->count - main_loop_exit_block->count;
    1716              :         }
    1717              : 
    1718              :       /* Adjust the epilog loop PHI entry values to continue iteration.
    1719              :          This adds remaining necessary LC PHI nodes to the main exit
    1720              :          and creates merge PHIs when we have multiple exits with
    1721              :          their appropriate continuation.  */
    1722        32747 :       if (flow_loops)
    1723              :         {
    1724        32747 :           edge loop_entry = single_succ_edge (new_preheader);
    1725        32747 :           bool peeled_iters = (uncounted_p
    1726        32747 :                                || single_pred (loop->latch) != loop_exit->src);
    1727              : 
    1728              :           /* Record the new SSA names in the cache so that we can skip
    1729              :              materializing them again when we fill in the rest of the LC SSA
    1730              :              variables.  */
    1731        32747 :           hash_map <tree, tree> new_phi_args;
    1732        32747 :           for (auto psi = gsi_start_phis (main_loop_exit_block);
    1733        78088 :                !gsi_end_p (psi); gsi_next (&psi))
    1734              :             {
    1735        45341 :               gphi *phi = *psi;
    1736        45341 :               tree new_arg = gimple_phi_arg_def_from_edge (phi, loop_exit);
    1737        45341 :               if (TREE_CODE (new_arg) != SSA_NAME)
    1738          315 :                 continue;
    1739              : 
    1740              :               /* If the loop doesn't have a virtual def then only possibly keep
    1741              :                  the epilog LC PHI for it and avoid creating new defs.  */
    1742        45116 :               if (virtual_operand_p (new_arg) && !need_virtual_phi)
    1743              :                 {
    1744           90 :                   auto gsi = gsi_for_stmt (phi);
    1745           90 :                   remove_phi_node (&gsi, true);
    1746           90 :                   continue;
    1747           90 :                 }
    1748              : 
    1749              :               /* If we decided not to remove the PHI node we should also not
    1750              :                  rematerialize it later on.  */
    1751        45026 :               new_phi_args.put (new_arg, gimple_phi_result (phi));
    1752              :             }
    1753              : 
    1754              :           /* Create the merge PHI nodes in new_preheader and populate the
    1755              :              arguments for the exits.  */
    1756        32747 :           if (redirect_multiple_exits_p)
    1757              :             {
    1758          615 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1759          615 :                    gsi_to = gsi_start_phis (new_loop->header);
    1760         2236 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1761         1621 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1762              :                 {
    1763         1621 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1764         1621 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1765              : 
    1766              :                   /* When the vector loop is peeled then we need to use the
    1767              :                      value at start of the loop, otherwise the main loop exit
    1768              :                      should use the final iter value.  */
    1769         1621 :                   tree new_arg;
    1770         1621 :                   if (peeled_iters)
    1771           71 :                     new_arg = gimple_phi_result (from_phi);
    1772              :                   else
    1773         1550 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1774              :                                                      loop_latch_edge (loop));
    1775              : 
    1776              :                   /* Check if we've already created a new phi node during edge
    1777              :                      redirection and re-use it if so.  Otherwise create a
    1778              :                      LC PHI node to feed the merge PHI.  */
    1779         1621 :                   tree *res;
    1780         3242 :                   if (virtual_operand_p (new_arg))
    1781              :                     {
    1782              :                       /* Use the existing virtual LC SSA from exit block.  */
    1783          410 :                       gphi *vphi = get_virtual_phi (main_loop_exit_block);
    1784          410 :                       new_arg = gimple_phi_result (vphi);
    1785              :                     }
    1786         1211 :                   else if ((res = new_phi_args.get (new_arg)))
    1787          105 :                     new_arg = *res;
    1788              :                   else
    1789              :                     {
    1790              :                       /* Create the LC PHI node for the exit.  */
    1791         1106 :                       tree new_def = copy_ssa_name (new_arg);
    1792         1106 :                       gphi *lc_phi
    1793         1106 :                           = create_phi_node (new_def, main_loop_exit_block);
    1794         1106 :                       SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
    1795         1106 :                       new_arg = new_def;
    1796              :                     }
    1797              : 
    1798              :                   /* Create the PHI node in the merge block merging the
    1799              :                      main and early exit values.  */
    1800         1621 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1801         1621 :                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
    1802         1621 :                   edge main_e = single_succ_edge (main_loop_exit_block);
    1803         1621 :                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
    1804              : 
    1805              :                   /* And adjust the epilog entry value.  */
    1806         1621 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1807              :                 }
    1808              :             }
    1809              : 
    1810        32747 :           if (redirect_multiple_exits_p)
    1811              :             {
    1812              :               /* After creating the merge PHIs handle the early exits those
    1813              :                  should use the values at the start of the loop.  */
    1814          615 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1815          615 :                    gsi_to = gsi_start_phis (new_preheader);
    1816         2236 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1817         1621 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1818              :                 {
    1819         1621 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1820         1621 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1821              : 
    1822              :                   /* Now update the virtual PHI nodes with the right value.  */
    1823         1621 :                   tree alt_arg = gimple_phi_result (from_phi);
    1824         3242 :                   if (virtual_operand_p (alt_arg))
    1825              :                     {
    1826          410 :                       gphi *vphi = get_virtual_phi (alt_loop_exit_block);
    1827          410 :                       alt_arg = gimple_phi_result (vphi);
    1828              :                     }
    1829              :                   /* For other live args we didn't create LC PHI nodes.
    1830              :                      Do so here.  */
    1831              :                   else
    1832              :                     {
    1833         1211 :                       tree alt_def = copy_ssa_name (alt_arg);
    1834         1211 :                       gphi *lc_phi
    1835         1211 :                         = create_phi_node (alt_def, alt_loop_exit_block);
    1836         2711 :                       for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
    1837              :                            ++i)
    1838         1500 :                         SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
    1839              :                       alt_arg = alt_def;
    1840              :                     }
    1841              : 
    1842              :                   /* The merge PHIs live in NEW_PREHEADER; their
    1843              :                      alternative argument always comes from the
    1844              :                      successor edge of ALT_LOOP_EXIT_BLOCK.  */
    1845         1621 :                   edge alt_e = single_succ_edge (alt_loop_exit_block);
    1846         1621 :                   SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
    1847              :                 }
    1848              :             }
    1849              : 
    1850              :           /* For the single exit case only create the missing LC PHI nodes
    1851              :              for the continuation of the loop IVs that are not also already
    1852              :              reductions and thus had LC PHI nodes on the exit already.  When
    1853              :              we are not redirecting the alternative exits the layout is:
    1854              : 
    1855              :                    loop_exit ---> new_preheader ---> epilog
    1856              :                    alt_exit ---------------> original dest
    1857              :            */
    1858          615 :           if (!redirect_multiple_exits_p)
    1859              :             {
    1860        32132 :               for (auto gsi_from = gsi_start_phis (loop->header),
    1861        32132 :                    gsi_to = gsi_start_phis (new_loop->header);
    1862       122449 :                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1863        90317 :                    gsi_next (&gsi_from), gsi_next (&gsi_to))
    1864              :                 {
    1865        90317 :                   gimple *from_phi = gsi_stmt (gsi_from);
    1866        90317 :                   gimple *to_phi = gsi_stmt (gsi_to);
    1867        90317 :                   tree new_arg;
    1868              : 
    1869              :                   /* Use the value on the exiting path.  When the exit is from
    1870              :                      the latch edge we want the post-iteration value on that
    1871              :                      edge; when the exit is from the loop header (before the
    1872              :                      latch ever executes) we must use the current header value,
    1873              :                      otherwise we pick up a name that was never defined.  */
    1874        90317 :                   if (!has_multiple_exits_p && !uncounted_p)
    1875        90038 :                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1876              :                                                      loop_latch_edge (loop));
    1877              :                   else
    1878          279 :                     new_arg = gimple_phi_result (from_phi);
    1879              : 
    1880              :                   /* Re-use the virtual LC PHI we already built when we are not
    1881              :                      redirecting the other exits to avoid creating duplicate
    1882              :                      virtual SSA names.  */
    1883       180634 :                   if (virtual_operand_p (new_arg))
    1884              :                     {
    1885        24454 :                       if (gphi *vphi = get_virtual_phi (main_loop_exit_block))
    1886              :                         {
    1887        24454 :                           adjust_phi_and_debug_stmts (to_phi, loop_entry,
    1888              :                                                       gimple_phi_result (vphi));
    1889        42328 :                           continue;
    1890              :                         }
    1891              :                     }
    1892              : 
    1893              :                   /* Check if we've already created a new phi node during edge
    1894              :                      redirection.  If we have, only propagate the value
    1895              :                      downwards.  */
    1896        65863 :                   if (tree *res = new_phi_args.get (new_arg))
    1897              :                     {
    1898              :                       /* Check if the new dest block already contains a use.  */
    1899        17874 :                       gimple *stmt = SSA_NAME_DEF_STMT (*res);
    1900              : 
    1901              :                       /* If the value already exist, just update the destination
    1902              :                          and if it doesn't we want a new node.  */
    1903        17874 :                       if (gimple_bb (stmt) == main_loop_exit_block)
    1904              :                         {
    1905        17874 :                           adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
    1906        17874 :                           continue;
    1907              :                         }
    1908              :                       else
    1909            0 :                         new_arg = *res;
    1910              :                     }
    1911              : 
    1912        47989 :                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
    1913        47989 :                   gphi *lcssa_phi = create_phi_node (new_res, main_loop_exit_block);
    1914        47989 :                   SET_PHI_ARG_DEF (lcssa_phi, loop_exit->dest_idx, new_arg);
    1915        47989 :                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
    1916              :                 }
    1917              :             }
    1918        32747 :         }
    1919              : 
    1920        32747 :       if (was_imm_dom || duplicate_outer_loop)
    1921        32473 :         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
    1922              : 
    1923              :       /* And remove the non-necessary forwarder again.  Keep the other
    1924              :          one so we have a proper pre-header for the loop at the exit edge.  */
    1925        32747 :       redirect_edge_pred (single_succ_edge (preheader),
    1926              :                           single_pred (preheader));
    1927        32747 :       delete_basic_block (preheader);
    1928        32747 :       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1929        32747 :                                loop_preheader_edge (scalar_loop)->src);
    1930              : 
    1931              :       /* Finally after wiring the new epilogue we need to update its main exit
    1932              :          to the original function exit we recorded.  Other exits are already
    1933              :          correct.  */
    1934        32747 :       if (has_multiple_exits_p || uncounted_p)
    1935              :         {
    1936          825 :           class loop *update_loop = new_loop;
    1937          825 :           doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
    1938        19201 :           for (unsigned i = 0; i < doms.length (); ++i)
    1939        18376 :             if (flow_bb_inside_loop_p (loop, doms[i]))
    1940         2579 :               doms.unordered_remove (i);
    1941              : 
    1942         4241 :           for (edge e : get_loop_exit_edges (update_loop))
    1943              :             {
    1944         1766 :               edge ex;
    1945         1766 :               edge_iterator ei;
    1946         3573 :               FOR_EACH_EDGE (ex, ei, e->dest->succs)
    1947              :                 {
    1948              :                   /* Find the first non-fallthrough block as fall-throughs can't
    1949              :                      dominate other blocks.  */
    1950         1807 :                   if (single_succ_p (ex->dest))
    1951              :                     {
    1952          951 :                       doms.safe_push (ex->dest);
    1953          951 :                       ex = single_succ_edge (ex->dest);
    1954              :                     }
    1955         1807 :                   doms.safe_push (ex->dest);
    1956              :                 }
    1957         1766 :               doms.safe_push (e->dest);
    1958          825 :             }
    1959              : 
    1960          825 :           iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    1961          825 :           if (updated_doms)
    1962          825 :             updated_doms->safe_splice (doms);
    1963              :         }
    1964              :     }
    1965              :   else /* Add the copy at entry.  */
    1966              :     {
    1967              :       /* Copy the current loop LC PHI nodes between the original loop exit
    1968              :          block and the new loop header.  This allows us to later split the
    1969              :          preheader block and still find the right LC nodes.  */
    1970         1587 :       if (flow_loops)
    1971          428 :         for (auto gsi_from = gsi_start_phis (new_loop->header),
    1972          428 :              gsi_to = gsi_start_phis (loop->header);
    1973         1345 :              !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
    1974          917 :              gsi_next (&gsi_from), gsi_next (&gsi_to))
    1975              :           {
    1976          917 :             gimple *from_phi = gsi_stmt (gsi_from);
    1977          917 :             gimple *to_phi = gsi_stmt (gsi_to);
    1978          917 :             tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
    1979              :                                                   loop_latch_edge (new_loop));
    1980          917 :             adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
    1981              :                                         new_arg);
    1982              :           }
    1983              : 
    1984         1587 :       if (scalar_loop != loop)
    1985              :         {
    1986              :           /* Remove the non-necessary forwarder of scalar_loop again.  */
    1987            4 :           redirect_edge_pred (single_succ_edge (preheader),
    1988              :                               single_pred (preheader));
    1989            4 :           delete_basic_block (preheader);
    1990            4 :           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
    1991            4 :                                    loop_preheader_edge (scalar_loop)->src);
    1992            4 :           preheader = split_edge (loop_preheader_edge (loop));
    1993            4 :           entry_e = single_pred_edge (preheader);
    1994              :         }
    1995              : 
    1996         1587 :       redirect_edge_and_branch_force (entry_e, new_preheader);
    1997         1587 :       flush_pending_stmts (entry_e);
    1998         1587 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
    1999              : 
    2000              : 
    2001              :       /* `vect_set_loop_condition' replaces the condition in the main exit of
    2002              :          loop.  For counted loops, this is the IV counting exit, so in the case
    2003              :          of the prolog loop, we are replacing the old IV counting exit limit of
    2004              :          total loop niters for the new limit of the prolog niters, as desired.
    2005              :          For uncounted loops, we don't have an IV-counting exit to replace, so
    2006              :          we add a dummy exit to be consumed by `vect_set_loop_condition' later
    2007              :          on.  */
    2008         1587 :       if (create_main_e)
    2009              :         {
    2010           31 :           edge to_latch_e = single_pred_edge (new_loop->latch);
    2011           31 :           bool latch_is_false = to_latch_e->flags & EDGE_FALSE_VALUE ? true
    2012              :                                                                      : false;
    2013              : 
    2014              :           /* Add new bb for duplicate exit.  */
    2015           31 :           basic_block bbcond = split_edge (to_latch_e);
    2016           31 :           gimple_stmt_iterator a = gsi_last_bb (bbcond);
    2017              : 
    2018              :           /* Fix flags for the edge leading to the latch.  */
    2019           31 :           to_latch_e = find_edge (bbcond, new_loop->latch);
    2020           31 :           to_latch_e->flags &= ~EDGE_FALLTHRU;
    2021           31 :           to_latch_e->flags |= latch_is_false ? EDGE_FALSE_VALUE
    2022              :                                               : EDGE_TRUE_VALUE;
    2023              : 
    2024              :           /* Build the condition.  */
    2025           31 :           tree cone = build_int_cst (sizetype, 1);
    2026           31 :           tree czero = build_int_cst (sizetype, 0);
    2027           31 :           gcond *cond_copy = gimple_build_cond (NE_EXPR, cone, czero, NULL_TREE,
    2028              :                                                 NULL_TREE);
    2029              : 
    2030           31 :           gsi_insert_after (&a, cond_copy, GSI_NEW_STMT);
    2031              : 
    2032              :           /* Add edge for exiting the loop via new condition.  */
    2033           38 :           edge dup_exit = make_edge (bbcond, new_exit->dest, latch_is_false
    2034              :                                 ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE);
    2035              : 
    2036           31 :           profile_probability probability = profile_probability::even ();
    2037           31 :           to_latch_e->probability = dup_exit->probability = probability;
    2038              : 
    2039           31 :           set_immediate_dominator (CDI_DOMINATORS, dup_exit->src,
    2040              :                                    new_exit->src);
    2041           31 :           new_exit = dup_exit;
    2042           31 :           *new_e = new_exit;
    2043              :         }
    2044              : 
    2045         1587 :       redirect_edge_and_branch_force (new_exit, preheader);
    2046         1587 :       flush_pending_stmts (new_exit);
    2047         1587 :       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
    2048              : 
    2049              :       /* And remove the non-necessary forwarder again.  Keep the other
    2050              :          one so we have a proper pre-header for the loop at the exit edge.  */
    2051         1587 :       redirect_edge_pred (single_succ_edge (new_preheader),
    2052              :                           single_pred (new_preheader));
    2053         1587 :       delete_basic_block (new_preheader);
    2054         1587 :       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
    2055         1587 :                                loop_preheader_edge (new_loop)->src);
    2056              : 
    2057              :       /* Update dominators for multiple exits.  */
    2058         1587 :       if (has_multiple_exits_p || create_main_e)
    2059              :         {
    2060         1103 :           for (edge alt_e : loop_exits)
    2061              :             {
    2062          437 :               if ((alt_e == loop_exit) && !create_main_e)
    2063          191 :                 continue;
    2064          246 :               basic_block old_dom
    2065          246 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    2066          246 :               if (flow_bb_inside_loop_p (loop, old_dom))
    2067              :                 {
    2068          101 :                   auto_vec<basic_block, 8> queue;
    2069          101 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    2070          325 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    2071          224 :                     if (!flow_bb_inside_loop_p (loop, son))
    2072          123 :                       queue.safe_push (son);
    2073          426 :                   for (auto son : queue)
    2074          123 :                     set_immediate_dominator (CDI_DOMINATORS,
    2075              :                                              son, get_bb_copy (old_dom));
    2076          101 :                 }
    2077              :             }
    2078              :         }
    2079              : 
    2080              :       /* When loop_exit != scalar_exit due to if-conversion loop versioning,
    2081              :          the `scalar_exit' now has two incoming edges, one from the if-converted
    2082              :          and one from the peeled prolog loop.  It is therefore dominated by a
    2083              :          common block between these.  Update its dominator accordingly.  */
    2084          222 :       if (create_main_e && loop_exit != scalar_exit)
    2085            0 :         set_immediate_dominator (CDI_DOMINATORS, scalar_exit->dest,
    2086              :                                  recompute_dominator (CDI_DOMINATORS,
    2087              :                                                       scalar_exit->dest));
    2088              :     }
    2089              : 
    2090        34334 :   free (new_bbs);
    2091        34334 :   free (bbs);
    2092              : 
    2093        34334 :   checking_verify_dominators (CDI_DOMINATORS);
    2094              : 
    2095        34334 :   return new_loop;
    2096        34334 : }
    2097              : 
    2098              : 
    2099              : /* Given the condition expression COND, put it as the last statement of
    2100              :    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
    2101              :    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
    2102              :    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
    2103              :    new edge as irreducible if IRREDUCIBLE_P is true.  */
    2104              : 
    2105              : static edge
    2106        50416 : slpeel_add_loop_guard (basic_block guard_bb, tree cond,
    2107              :                        basic_block guard_to, basic_block dom_bb,
    2108              :                        profile_probability probability, bool irreducible_p)
    2109              : {
    2110        50416 :   gimple_stmt_iterator gsi;
    2111        50416 :   edge new_e, enter_e;
    2112        50416 :   gcond *cond_stmt;
    2113        50416 :   gimple_seq gimplify_stmt_list = NULL;
    2114              : 
    2115        50416 :   enter_e = EDGE_SUCC (guard_bb, 0);
    2116        50416 :   enter_e->flags &= ~EDGE_FALLTHRU;
    2117        50416 :   enter_e->flags |= EDGE_FALSE_VALUE;
    2118        50416 :   gsi = gsi_last_bb (guard_bb);
    2119              : 
    2120        50416 :   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
    2121              :                                  is_gimple_condexpr_for_cond, NULL_TREE);
    2122        50416 :   if (gimplify_stmt_list)
    2123        22775 :     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
    2124              : 
    2125        50416 :   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
    2126        50416 :   gsi = gsi_last_bb (guard_bb);
    2127        50416 :   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
    2128              : 
    2129              :   /* Add new edge to connect guard block to the merge/loop-exit block.  */
    2130        50416 :   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
    2131              : 
    2132        50416 :   new_e->probability = probability;
    2133        50416 :   if (irreducible_p)
    2134           16 :     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
    2135              : 
    2136        50416 :   enter_e->probability = probability.invert ();
    2137        50416 :   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
    2138              : 
    2139              :   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
    2140        50416 :   if (enter_e->dest->loop_father->header == enter_e->dest)
    2141          474 :     split_edge (enter_e);
    2142              : 
    2143        50416 :   return new_e;
    2144              : }
    2145              : 
    2146              : 
    2147              : /* This function verifies that the following restrictions apply to LOOP:
    2148              :    (1) it consists of exactly 2 basic blocks - header, and an empty latch
    2149              :        for innermost loop and 5 basic blocks for outer-loop.
    2150              :    (2) it is single entry, single exit
    2151              :    (3) its exit condition is the last stmt in the header
    2152              :    (4) E is the entry/exit edge of LOOP.
    2153              :  */
    2154              : 
    2155              : bool
    2156       496391 : slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
    2157              :                              const_edge e)
    2158              : {
    2159       496391 :   edge entry_e = loop_preheader_edge (loop);
    2160       496391 :   gcond *orig_cond = get_loop_exit_condition (exit_e);
    2161       496391 :   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
    2162              : 
    2163              :   /* All loops have an outer scope; the only case loop->outer is NULL is for
    2164              :      the function itself.  */
    2165       496391 :   if (!loop_outer (loop)
    2166       496391 :       || !empty_block_p (loop->latch)
    2167              :       || !exit_e
    2168              :       /* Verify that new loop exit condition can be trivially modified.  */
    2169       496391 :       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
    2170       992782 :       || (e != exit_e && e != entry_e))
    2171            0 :     return false;
    2172              : 
    2173       496391 :   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
    2174       496391 :   get_loop_body_with_size (loop, bbs, loop->num_nodes);
    2175       496391 :   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
    2176       496391 :   free (bbs);
    2177       496391 :   return ret;
    2178              : }
    2179              : 
    2180              : /* Function find_loop_location.
    2181              : 
    2182              :    Extract the location of the loop in the source code.
    2183              :    If the loop is not well formed for vectorization, an estimated
    2184              :    location is calculated.
    2185              :    Return the loop location if succeed and NULL if not.  */
    2186              : 
    2187              : dump_user_location_t
    2188      3558046 : find_loop_location (class loop *loop)
    2189              : {
    2190      3558046 :   gimple *stmt = NULL;
    2191      3558046 :   basic_block bb;
    2192      3558046 :   gimple_stmt_iterator si;
    2193              : 
    2194      3558046 :   if (!loop)
    2195            0 :     return dump_user_location_t ();
    2196              : 
    2197              :   /* For the root of the loop tree return the function location.  */
    2198      3558046 :   if (!loop_outer (loop))
    2199            0 :     return dump_user_location_t::from_function_decl (cfun->decl);
    2200              : 
    2201      3558046 :   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
    2202              :     {
    2203              :       /* We only care about the loop location, so use any exit with location
    2204              :          information.  */
    2205     11163019 :       for (edge e : get_loop_exit_edges (loop))
    2206              :         {
    2207      3653472 :           stmt = get_loop_exit_condition (e);
    2208              : 
    2209      3653472 :           if (stmt
    2210      3653472 :               && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2211      3105207 :             return stmt;
    2212      3558046 :         }
    2213              :     }
    2214              : 
    2215              :   /* If we got here the loop is probably not "well formed",
    2216              :      try to estimate the loop location */
    2217              : 
    2218       452839 :   if (!loop->header)
    2219            0 :     return dump_user_location_t ();
    2220              : 
    2221       452839 :   bb = loop->header;
    2222              : 
    2223      1600246 :   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
    2224              :     {
    2225      1028292 :       stmt = gsi_stmt (si);
    2226      1028292 :       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
    2227       333724 :         return stmt;
    2228              :     }
    2229              : 
    2230       119115 :   return dump_user_location_t ();
    2231              : }
    2232              : 
    2233              : /* Return true if the phi described by STMT_INFO defines an IV of the
    2234              :    loop to be vectorized.  */
    2235              : 
    2236              : static bool
    2237      1330814 : iv_phi_p (stmt_vec_info stmt_info)
    2238              : {
    2239      1330814 :   gphi *phi = as_a <gphi *> (stmt_info->stmt);
    2240      2661628 :   if (virtual_operand_p (PHI_RESULT (phi)))
    2241              :     return false;
    2242              : 
    2243      1053811 :   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
    2244      1053811 :       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
    2245       156698 :     return false;
    2246              : 
    2247              :   return true;
    2248              : }
    2249              : 
    2250              : /* Return true if vectorizer can peel for nonlinear iv.  */
    2251              : static bool
    2252         7772 : vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
    2253              :                               stmt_vec_info stmt_info)
    2254              : {
    2255         7772 :   enum vect_induction_op_type induction_type
    2256              :     = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
    2257         7772 :   tree niters_skip;
    2258              :   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
    2259              :      if niters or vf is unkown:
    2260              :      For shift, when shift mount >= precision, there would be UD.
    2261              :      For mult, don't known how to generate
    2262              :      init_expr * pow (step, niters) for variable niters.
    2263              :      For neg unknown niters are ok, since niters of vectorized main loop
    2264              :      will always be multiple of 2.
    2265              :      See also PR113163,  PR114196 and PR114485.  */
    2266         7772 :   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    2267         7772 :       || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    2268         7772 :       || (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    2269         3102 :           && induction_type != vect_step_op_neg))
    2270              :     {
    2271         2918 :       if (dump_enabled_p ())
    2272           12 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2273              :                          "Peeling for epilogue is not supported"
    2274              :                          " for this nonlinear induction"
    2275              :                          " when iteration count is unknown or"
    2276              :                          " when using partial vectorization.\n");
    2277         2918 :       return false;
    2278              :     }
    2279              : 
    2280         4854 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    2281          266 :       && induction_type == vect_step_op_mul)
    2282              :     {
    2283           24 :       if (dump_enabled_p ())
    2284            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2285              :                          "Peeling for is not supported for nonlinear mult"
    2286              :                          " induction using partial vectorization.\n");
    2287           24 :       return false;
    2288              :     }
    2289              : 
    2290              :   /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
    2291         4588 :   if (induction_type == vect_step_op_mul)
    2292              :     {
    2293          409 :       tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
    2294          409 :       tree type = TREE_TYPE (step_expr);
    2295              : 
    2296          812 :       if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
    2297          409 :           && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
    2298              :         {
    2299            6 :           if (dump_enabled_p ())
    2300            6 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2301              :                              "Avoid compile time hog on"
    2302              :                              " vect_peel_nonlinear_iv_init"
    2303              :                              " for nonlinear induction vec_step_op_mul"
    2304              :                              " when iteration count is too big.\n");
    2305            6 :           return false;
    2306              :         }
    2307              :     }
    2308              : 
    2309              :   /* Also doens't support peel for neg when niter is variable.
    2310              :      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
    2311         4824 :   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
    2312         4824 :   if ((niters_skip != NULL_TREE
    2313            0 :        && (TREE_CODE (niters_skip) != INTEGER_CST
    2314            0 :            || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
    2315         4824 :       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
    2316         4824 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
    2317              :     {
    2318            4 :       if (dump_enabled_p ())
    2319            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2320              :                          "Peeling for alignement is not supported"
    2321              :                          " for nonlinear induction when niters_skip"
    2322              :                          " is not constant.\n");
    2323            4 :       return false;
    2324              :     }
    2325              : 
    2326              :   return true;
    2327              : }
    2328              : 
    2329              : /* Function vect_can_advance_ivs_p
    2330              : 
    2331              :    In case the number of iterations that LOOP iterates is unknown at compile
    2332              :    time, an epilog loop will be generated, and the loop induction variables
    2333              :    (IVs) will be "advanced" to the value they are supposed to take just before
    2334              :    the epilog loop.  Here we check that the access function of the loop IVs
    2335              :    and the expression that represents the loop bound are simple enough.
    2336              :    These restrictions will be relaxed in the future.  */
    2337              : 
    2338              : bool
    2339       499885 : vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
    2340              : {
    2341       499885 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2342       499885 :   basic_block bb = loop->header;
    2343       499885 :   gphi_iterator gsi;
    2344              : 
    2345              :   /* Analyze phi functions of the loop header.  */
    2346              : 
    2347       499885 :   if (dump_enabled_p ())
    2348        25170 :     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
    2349      1733175 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2350              :     {
    2351      1237255 :       tree evolution_part;
    2352      1237255 :       enum vect_induction_op_type induction_type;
    2353              : 
    2354      1237255 :       gphi *phi = gsi.phi ();
    2355      1237255 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2356      1237255 :       if (dump_enabled_p ())
    2357        72199 :         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
    2358              :                          phi_info->stmt);
    2359              : 
    2360              :       /* Skip virtual phi's. The data dependences that are associated with
    2361              :          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
    2362              : 
    2363              :          Skip reduction phis.  */
    2364      1237255 :       if (!iv_phi_p (phi_info))
    2365              :         {
    2366       390875 :           if (dump_enabled_p ())
    2367        25649 :             dump_printf_loc (MSG_NOTE, vect_location,
    2368              :                              "reduc or virtual phi. skip.\n");
    2369       390875 :           continue;
    2370              :         }
    2371              : 
    2372       846380 :       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2373       846380 :       if (induction_type != vect_step_op_add)
    2374              :         {
    2375         7772 :           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
    2376              :             return false;
    2377              : 
    2378         4820 :           continue;
    2379              :         }
    2380              : 
    2381              :       /* Analyze the evolution function.  */
    2382              : 
    2383       838608 :       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2384       838608 :       if (evolution_part == NULL_TREE)
    2385              :         {
    2386          981 :           if (dump_enabled_p ())
    2387           78 :             dump_printf (MSG_MISSED_OPTIMIZATION,
    2388              :                          "No access function or evolution.\n");
    2389          981 :           return false;
    2390              :         }
    2391              : 
    2392              :       /* FORNOW: We do not transform initial conditions of IVs
    2393              :          which evolution functions are not invariants in the loop.  */
    2394              : 
    2395       837627 :       if (!expr_invariant_in_loop_p (loop, evolution_part))
    2396              :         {
    2397           32 :           if (dump_enabled_p ())
    2398            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2399              :                              "evolution not invariant in loop.\n");
    2400           32 :           return false;
    2401              :         }
    2402              : 
    2403              :       /* FORNOW: We do not transform initial conditions of IVs
    2404              :          which evolution functions are a polynomial of degree >= 2.  */
    2405              : 
    2406      2070885 :       if (tree_is_chrec (evolution_part))
    2407              :         {
    2408            0 :           if (dump_enabled_p ())
    2409            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2410              :                              "evolution is chrec.\n");
    2411            0 :           return false;
    2412              :         }
    2413              :     }
    2414              : 
    2415              :   return true;
    2416              : }
    2417              : 
    2418              : 
    2419              : /*   Function vect_update_ivs_after_vectorizer.
    2420              : 
    2421              :      "Advance" the induction variables of LOOP to the value they should take
    2422              :      after the execution of LOOP.  This is currently necessary because the
    2423              :      vectorizer does not handle induction variables that are used after the
    2424              :      loop.  Such a situation occurs when the last iterations of LOOP are
    2425              :      peeled, because:
    2426              :      1. We introduced new uses after LOOP for IVs that were not originally used
    2427              :         after LOOP: the IVs of LOOP are now used by an epilog loop.
    2428              :      2. LOOP is going to be vectorized; this means that it will iterate N/VF
    2429              :         times, whereas the loop IVs should be bumped N times.
    2430              : 
    2431              :      Input:
    2432              :      - LOOP - a loop that is going to be vectorized. The last few iterations
    2433              :               of LOOP were peeled.
    2434              :      - NITERS - the number of iterations that LOOP executes (before it is
    2435              :                 vectorized). i.e, the number of times the ivs should be bumped.
    2436              :      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
    2437              :                   coming out from LOOP on which there are uses of the LOOP ivs
    2438              :                   (this is the path from LOOP->exit to epilog_loop->preheader).
    2439              : 
    2440              :                   The new definitions of the ivs are placed in LOOP->exit.
    2441              :                   The phi args associated with the edge UPDATE_E in the bb
    2442              :                   UPDATE_E->dest are updated accordingly.
    2443              : 
    2444              :      - EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
    2445              :                       the main latch exit.
    2446              : 
    2447              :      Assumption 1: Like the rest of the vectorizer, this function assumes
    2448              :      a single loop exit that has a single predecessor.
    2449              : 
    2450              :      Assumption 2: The phi nodes in the LOOP header and in update_bb are
    2451              :      organized in the same order.
    2452              : 
    2453              :      Assumption 3: The access function of the ivs is simple enough (see
    2454              :      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
    2455              : 
    2456              :      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
    2457              :      coming out of LOOP on which the ivs of LOOP are used (this is the path
    2458              :      that leads to the epilog loop; other paths skip the epilog loop).  This
    2459              :      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
    2460              :      needs to have its phis updated.
    2461              :  */
    2462              : 
    2463              : static void
    2464        33362 : vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    2465              :                                   tree niters, edge update_e,
    2466              :                                   bool early_exit_p)
    2467              : {
    2468        33362 :   gphi_iterator gsi, gsi1;
    2469        33362 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2470        33362 :   basic_block update_bb = update_e->dest;
    2471        33362 :   basic_block exit_bb = update_e->src;
    2472              :   /* Check to see if this is an empty loop pre-header block.  If it exists
    2473              :      we need to use the edge from that block -> loop header for updates but
    2474              :      must use the original exit_bb to add any new adjustment because there
    2475              :      can be a skip_epilog edge bypassing the epilog and so the loop pre-header
    2476              :      too.  */
    2477        33437 :   if (empty_block_p (update_bb) && single_succ_p (update_bb))
    2478              :     {
    2479           75 :       update_e = single_succ_edge (update_bb);
    2480           75 :       update_bb = update_e->dest;
    2481              :     }
    2482        33362 :   gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
    2483              : 
    2484        33362 :   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
    2485       126921 :        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
    2486        93559 :        gsi_next (&gsi), gsi_next (&gsi1))
    2487              :     {
    2488        93559 :       tree init_expr;
    2489        93559 :       tree step_expr, off;
    2490        93559 :       tree type;
    2491        93559 :       tree var, ni, ni_name;
    2492              : 
    2493        93559 :       gphi *phi = gsi.phi ();
    2494        93559 :       gphi *phi1 = gsi1.phi ();
    2495        93559 :       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
    2496        93559 :       if (dump_enabled_p ())
    2497        12237 :         dump_printf_loc (MSG_NOTE, vect_location,
    2498              :                          "vect_update_ivs_after_vectorizer: phi: %G",
    2499              :                          (gimple *) phi);
    2500              : 
    2501              :       /* Skip reduction and virtual phis.  */
    2502        93559 :       if (!iv_phi_p (phi_info))
    2503              :         {
    2504        42826 :           if (dump_enabled_p ())
    2505         4762 :             dump_printf_loc (MSG_NOTE, vect_location,
    2506              :                              "reduc or virtual phi. skip.\n");
    2507        42826 :           continue;
    2508              :         }
    2509              : 
    2510        50733 :       type = TREE_TYPE (gimple_phi_result (phi));
    2511        50733 :       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
    2512        50733 :       step_expr = unshare_expr (step_expr);
    2513              : 
    2514              :       /* FORNOW: We do not support IVs whose evolution function is a polynomial
    2515              :          of degree >= 2 or exponential.  */
    2516        50733 :       gcc_assert (!tree_is_chrec (step_expr));
    2517              : 
    2518        50733 :       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
    2519        50733 :       gimple_seq stmts = NULL;
    2520        50733 :       enum vect_induction_op_type induction_type
    2521              :         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
    2522              : 
    2523        50733 :       if (induction_type == vect_step_op_add)
    2524              :         {
    2525        50605 :           tree stype = TREE_TYPE (step_expr);
    2526        50605 :           off = fold_build2 (MULT_EXPR, stype,
    2527              :                                fold_convert (stype, niters), step_expr);
    2528              : 
    2529        50605 :           if (POINTER_TYPE_P (type))
    2530         3552 :             ni = fold_build_pointer_plus (init_expr, off);
    2531              :           else
    2532        47053 :             ni = fold_convert (type,
    2533              :                                fold_build2 (PLUS_EXPR, stype,
    2534              :                                             fold_convert (stype, init_expr),
    2535              :                                             off));
    2536              :         }
    2537              :       /* Don't bother call vect_peel_nonlinear_iv_init.  */
    2538          128 :       else if (induction_type == vect_step_op_neg)
    2539              :         ni = init_expr;
    2540              :       else
    2541           84 :         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
    2542              :                                           niters, step_expr,
    2543              :                                           induction_type, early_exit_p);
    2544              : 
    2545        50733 :       var = create_tmp_var (type, "tmp");
    2546              : 
    2547        50733 :       gimple_seq new_stmts = NULL;
    2548        50733 :       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
    2549              : 
    2550              :       /* Exit_bb shouldn't be empty, but we also can't insert after a ctrl
    2551              :          statements.  */
    2552        50733 :       if (!gsi_end_p (last_gsi) && !is_ctrl_stmt (gsi_stmt (last_gsi)))
    2553              :         {
    2554          248 :           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
    2555          248 :           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
    2556              :         }
    2557              :       else
    2558              :         {
    2559        50485 :           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
    2560        50485 :           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
    2561              :         }
    2562              : 
    2563              :       /* Update the PHI argument on the requested edge.  */
    2564        50733 :       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
    2565              :     }
    2566        33362 : }
    2567              : 
    2568              : /* Return a gimple value containing the misalignment (measured in vector
    2569              :    elements) for the loop described by LOOP_VINFO, i.e. how many elements
    2570              :    it is away from a perfectly aligned address.  Add any new statements
    2571              :    to SEQ.  */
    2572              : 
    2573              : static tree
    2574          202 : get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
    2575              : {
    2576          202 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2577          202 :   stmt_vec_info stmt_info = dr_info->stmt;
    2578          202 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2579              : 
    2580          202 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2581          202 :   unsigned HOST_WIDE_INT target_align_c;
    2582          202 :   tree target_align_minus_1;
    2583              : 
    2584          202 :   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2585          202 :                                         size_zero_node) < 0;
    2586          202 :   tree offset = (negative
    2587          202 :                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    2588              :                              * TREE_INT_CST_LOW
    2589              :                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    2590          199 :                  : size_zero_node);
    2591          202 :   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
    2592              :                                                           stmt_info, seq,
    2593              :                                                           offset);
    2594          202 :   tree type = unsigned_type_for (TREE_TYPE (start_addr));
    2595          202 :   if (target_align.is_constant (&target_align_c))
    2596          202 :     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
    2597              :   else
    2598              :     {
    2599              :       tree vla = build_int_cst (type, target_align);
    2600              :       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla,
    2601              :                                           build_int_cst (type, 1));
    2602              :     }
    2603              : 
    2604          202 :   HOST_WIDE_INT elem_size
    2605          202 :     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2606          404 :   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
    2607              : 
    2608              :   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
    2609          202 :   tree int_start_addr = fold_convert (type, start_addr);
    2610          202 :   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
    2611              :                                         target_align_minus_1);
    2612              : 
    2613              :   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
    2614          202 :   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
    2615              :                                         elem_size_log);
    2616              : 
    2617          202 :   return misalign_in_elems;
    2618              : }
    2619              : 
    2620              : /* Function vect_gen_prolog_loop_niters
    2621              : 
    2622              :    Generate the number of iterations which should be peeled as prolog for the
    2623              :    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
    2624              :    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
    2625              :    As a result, after the execution of this loop, the data reference DR will
    2626              :    refer to an aligned location.  The following computation is generated:
    2627              : 
    2628              :    If the misalignment of DR is known at compile time:
    2629              :      addr_mis = int mis = DR_MISALIGNMENT (dr);
    2630              :    Else, compute address misalignment in bytes:
    2631              :      addr_mis = addr & (target_align - 1)
    2632              : 
    2633              :    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
    2634              : 
    2635              :    (elem_size = element type size; an element is the scalar element whose type
    2636              :    is the inner type of the vectype)
    2637              : 
    2638              :    The computations will be emitted at the end of BB.  We also compute and
    2639              :    store upper bound (included) of the result in BOUND.
    2640              : 
    2641              :    When the step of the data-ref in the loop is not 1 (as in interleaved data
    2642              :    and SLP), the number of iterations of the prolog must be divided by the step
    2643              :    (which is equal to the size of interleaved group).
    2644              : 
    2645              :    The above formulas assume that VF == number of elements in the vector. This
    2646              :    may not hold when there are multiple-types in the loop.
    2647              :    In this case, for some data-references in the loop the VF does not represent
    2648              :    the number of elements that fit in the vector.  Therefore, instead of VF we
    2649              :    use TYPE_VECTOR_SUBPARTS.  */
    2650              : 
    2651              : static tree
    2652          428 : vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
    2653              :                              basic_block bb, poly_int64 *bound)
    2654              : {
    2655          428 :   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
    2656          428 :   tree var;
    2657          428 :   tree niters_type
    2658          428 :     = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) ? sizetype
    2659          428 :                                                  : TREE_TYPE (LOOP_VINFO_NITERS
    2660              :                                                               (loop_vinfo));
    2661          428 :   gimple_seq stmts = NULL, new_stmts = NULL;
    2662          428 :   tree iters, iters_name;
    2663          428 :   stmt_vec_info stmt_info = dr_info->stmt;
    2664          428 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2665          428 :   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
    2666              : 
    2667          428 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2668              :     {
    2669          226 :       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2670              : 
    2671          226 :       if (dump_enabled_p ())
    2672          217 :         dump_printf_loc (MSG_NOTE, vect_location,
    2673              :                          "known peeling = %d.\n", npeel);
    2674              : 
    2675          226 :       iters = build_int_cst (niters_type, npeel);
    2676          226 :       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    2677              :     }
    2678              :   else
    2679              :     {
    2680          202 :       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
    2681          202 :       tree type = TREE_TYPE (misalign_in_elems);
    2682          202 :       HOST_WIDE_INT elem_size
    2683          202 :         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    2684              :       /* We only do prolog peeling if the target alignment is known at compile
    2685              :          time.  */
    2686          202 :       poly_uint64 align_in_elems =
    2687          202 :         exact_div (target_align, elem_size);
    2688          202 :       tree align_in_elems_minus_1 =
    2689          202 :         build_int_cst (type, align_in_elems - 1);
    2690          202 :       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
    2691              : 
    2692              :       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
    2693              :                                  & (align_in_elems - 1)).  */
    2694          202 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2695          202 :                                             size_zero_node) < 0;
    2696          202 :       if (negative)
    2697            3 :         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
    2698              :                              align_in_elems_tree);
    2699              :       else
    2700          199 :         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
    2701              :                              misalign_in_elems);
    2702          202 :       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
    2703          202 :       iters = fold_convert (niters_type, iters);
    2704          202 :       *bound = align_in_elems;
    2705              :     }
    2706              : 
    2707          428 :   if (dump_enabled_p ())
    2708          276 :     dump_printf_loc (MSG_NOTE, vect_location,
    2709              :                      "niters for prolog loop: %T\n", iters);
    2710              : 
    2711          428 :   var = create_tmp_var (niters_type, "prolog_loop_niters");
    2712          428 :   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
    2713              : 
    2714          428 :   if (new_stmts)
    2715          202 :     gimple_seq_add_seq (&stmts, new_stmts);
    2716          428 :   if (stmts)
    2717              :     {
    2718          202 :       gcc_assert (single_succ_p (bb));
    2719          202 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
    2720          202 :       if (gsi_end_p (gsi))
    2721           42 :         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    2722              :       else
    2723          160 :         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
    2724              :     }
    2725          428 :   return iters_name;
    2726              : }
    2727              : 
    2728              : 
    2729              : /* Function vect_update_init_of_dr
    2730              : 
    2731              :    If CODE is PLUS, the vector loop starts NITERS iterations after the
    2732              :    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
    2733              :    iterations before the scalar one (using masking to skip inactive
    2734              :    elements).  This function updates the information recorded in DR to
    2735              :    account for the difference.  Specifically, it updates the OFFSET
    2736              :    field of DR_INFO.  */
    2737              : 
    2738              : static void
    2739        24323 : vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
    2740              : {
    2741        24323 :   struct data_reference *dr = dr_info->dr;
    2742        24323 :   tree offset = dr_info->offset;
    2743        24323 :   if (!offset)
    2744        24323 :     offset = build_zero_cst (sizetype);
    2745              : 
    2746        24323 :   niters = fold_build2 (MULT_EXPR, sizetype,
    2747              :                         fold_convert (sizetype, niters),
    2748              :                         fold_convert (sizetype, DR_STEP (dr)));
    2749        24323 :   offset = fold_build2 (code, sizetype,
    2750              :                         fold_convert (sizetype, offset), niters);
    2751        24323 :   dr_info->offset = offset;
    2752        24323 : }
    2753              : 
    2754              : 
    2755              : /* Function vect_update_inits_of_drs
    2756              : 
    2757              :    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
    2758              :    CODE and NITERS are as for vect_update_inits_of_dr.  */
    2759              : 
    2760              : void
    2761         7252 : vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
    2762              :                           tree_code code)
    2763              : {
    2764         7252 :   unsigned int i;
    2765         7252 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2766         7252 :   struct data_reference *dr;
    2767              : 
    2768         7252 :   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
    2769              : 
    2770              :   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
    2771              :      here, but since we might use these niters to update the epilogues niters
    2772              :      and data references we can't insert them here as this definition might not
    2773              :      always dominate its uses.  */
    2774         7252 :   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
    2775         4570 :     niters = fold_convert (sizetype, niters);
    2776              : 
    2777        39131 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2778              :     {
    2779        24747 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2780        24747 :       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
    2781        24323 :           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
    2782        24323 :         vect_update_init_of_dr (dr_info, niters, code);
    2783              :     }
    2784         7252 : }
    2785              : 
    2786              : /* For the information recorded in LOOP_VINFO prepare the loop for peeling
    2787              :    by masking.  This involves calculating the number of iterations to
    2788              :    be peeled and then aligning all memory references appropriately.  */
    2789              : 
    2790              : void
    2791            1 : vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
    2792              : {
    2793            1 :   tree misalign_in_elems;
    2794            1 :   tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
    2795              : 
    2796            1 :   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
    2797              : 
    2798              :   /* From the information recorded in LOOP_VINFO get the number of iterations
    2799              :      that need to be skipped via masking.  */
    2800            1 :   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
    2801              :     {
    2802            1 :       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    2803            1 :                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
    2804            1 :       misalign_in_elems = build_int_cst (type, misalign);
    2805              :     }
    2806              :   else
    2807              :     {
    2808            0 :       gimple_seq seq1 = NULL, seq2 = NULL;
    2809            0 :       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
    2810            0 :       misalign_in_elems = fold_convert (type, misalign_in_elems);
    2811            0 :       misalign_in_elems = force_gimple_operand (misalign_in_elems,
    2812              :                                                 &seq2, true, NULL_TREE);
    2813            0 :       gimple_seq_add_seq (&seq1, seq2);
    2814            0 :       if (seq1)
    2815              :         {
    2816            0 :           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2817            0 :           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
    2818            0 :           gcc_assert (!new_bb);
    2819              :         }
    2820              :     }
    2821              : 
    2822            1 :   if (dump_enabled_p ())
    2823            1 :     dump_printf_loc (MSG_NOTE, vect_location,
    2824              :                      "misalignment for fully-masked loop: %T\n",
    2825              :                      misalign_in_elems);
    2826              : 
    2827            1 :   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
    2828              : 
    2829            1 :   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
    2830            1 : }
    2831              : 
    2832              : /* This function builds ni_name = number of iterations.  Statements
    2833              :    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
    2834              :    it to TRUE if new ssa_var is generated.  */
    2835              : 
    2836              : tree
    2837        61994 : vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
    2838              : {
    2839        61994 :   if (LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
    2840              :     return NULL_TREE;
    2841        61920 :   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
    2842        61920 :   if (TREE_CODE (ni) == INTEGER_CST)
    2843              :     return ni;
    2844              :   else
    2845              :     {
    2846        25849 :       tree ni_name, var;
    2847        25849 :       gimple_seq stmts = NULL;
    2848        25849 :       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2849              : 
    2850        25849 :       var = create_tmp_var (TREE_TYPE (ni), "niters");
    2851        25849 :       ni_name = force_gimple_operand (ni, &stmts, false, var);
    2852        25849 :       if (stmts)
    2853              :         {
    2854        24790 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2855        24790 :           if (new_var_p != NULL)
    2856          212 :             *new_var_p = true;
    2857              :         }
    2858              : 
    2859        25849 :       return ni_name;
    2860              :     }
    2861              : }
    2862              : 
    2863              : /* Calculate the number of iterations above which vectorized loop will be
    2864              :    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
    2865              :    of prolog loop.  If it's integer const, the integer number is also passed
    2866              :    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
    2867              :    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
    2868              :    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
    2869              :    threshold below which the scalar (rather than vectorized) loop will be
    2870              :    executed.  This function stores the upper bound (inclusive) of the result
    2871              :    in BOUND_SCALAR.  */
    2872              : 
    2873              : static tree
    2874        24834 : vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
    2875              :                              poly_int64 bound_prolog, poly_int64 bound_epilog,
    2876              :                              int th, poly_uint64 *bound_scalar,
    2877              :                              bool check_profitability)
    2878              : {
    2879        24834 :   tree type = TREE_TYPE (niters_prolog);
    2880        24834 :   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
    2881              :                              build_int_cst (type, bound_epilog));
    2882              : 
    2883        24834 :   *bound_scalar = bound_prolog + bound_epilog;
    2884        24834 :   if (check_profitability)
    2885              :     {
    2886              :       /* TH indicates the minimum niters of vectorized loop, while we
    2887              :          compute the maximum niters of scalar loop.  */
    2888        15919 :       th--;
    2889              :       /* Peeling for constant times.  */
    2890        15919 :       if (int_niters_prolog >= 0)
    2891              :         {
    2892        15881 :           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
    2893        15881 :           return build_int_cst (type, *bound_scalar);
    2894              :         }
    2895              :       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
    2896              :          and BOUND_EPILOG are inclusive upper bounds.  */
    2897           38 :       if (known_ge (th, bound_prolog + bound_epilog))
    2898              :         {
    2899            0 :           *bound_scalar = th;
    2900            0 :           return build_int_cst (type, th);
    2901              :         }
    2902              :       /* Need to do runtime comparison.  */
    2903           38 :       else if (maybe_gt (th, bound_epilog))
    2904              :         {
    2905           38 :           *bound_scalar = upper_bound (*bound_scalar, th);
    2906           38 :           return fold_build2 (MAX_EXPR, type,
    2907              :                               build_int_cst (type, th), niters);
    2908              :         }
    2909              :     }
    2910              :   return niters;
    2911              : }
    2912              : 
    2913              : /* NITERS is the number of times that the original scalar loop executes
    2914              :    after peeling.  Work out the maximum number of iterations N that can
    2915              :    be handled by the vectorized form of the loop and then either:
    2916              : 
    2917              :    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
    2918              : 
    2919              :         niters_vector = N
    2920              : 
    2921              :    b) set *STEP_VECTOR_PTR to one and generate:
    2922              : 
    2923              :         niters_vector = N / vf
    2924              : 
    2925              :    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
    2926              :    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
    2927              :    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).
    2928              : 
    2929              :    Case (a) is used for LOOP_VINFO_USING_PARTIAL_VECTORS_P or if VF is
    2930              :    variable.  As stated above, NITERS_VECTOR then equals the number
    2931              :    of scalar iterations and vect_set_loop_condition will handle the
    2932              :    step.  As opposed to (b) we don't know anything about NITER_VECTOR's
    2933              :    range here.
    2934              : */
    2935              : 
    2936              : void
    2937        33497 : vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
    2938              :                              tree *niters_vector_ptr, tree *step_vector_ptr,
    2939              :                              bool niters_no_overflow)
    2940              : {
    2941        33497 :   tree ni_minus_gap, var;
    2942        33497 :   tree niters_vector, step_vector;
    2943        33497 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    2944        33497 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2945        33497 :   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
    2946              : 
    2947              :   /* If epilogue loop is required because of data accesses with gaps, we
    2948              :      subtract one iteration from the total number of iterations here for
    2949              :      correct calculation of RATIO.  */
    2950        33497 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    2951              :     {
    2952          372 :       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
    2953              :                                   build_one_cst (type));
    2954          372 :       if (!is_gimple_val (ni_minus_gap))
    2955              :         {
    2956          170 :           var = create_tmp_var (type, "ni_gap");
    2957          170 :           gimple *stmts = NULL;
    2958          170 :           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
    2959              :                                                true, var);
    2960          170 :           gsi_insert_seq_on_edge_immediate (pe, stmts);
    2961              :         }
    2962              :     }
    2963              :   else
    2964              :     ni_minus_gap = niters;
    2965              : 
    2966              :   /* To silence some unexpected warnings, simply initialize to 0. */
    2967        33497 :   unsigned HOST_WIDE_INT const_vf = 0;
    2968        33497 :   if (vf.is_constant (&const_vf)
    2969        33497 :       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
    2970              :     {
    2971              :       /* Create: niters / vf, which is equivalent to niters >> log2(vf) when
    2972              :                  vf is a power of two, and when not we approximate using a
    2973              :                  truncating division.  */
    2974              :       /* If it's known that niters == number of latch executions + 1 doesn't
    2975              :          overflow, we can generate niters / vf; otherwise we generate
    2976              :          (niters - vf) / vf + 1 by using the fact that we know ratio
    2977              :          will be at least one.  */
    2978        33479 :       tree var_vf = build_int_cst (type, const_vf);
    2979        33479 :       if (niters_no_overflow)
    2980        33258 :         niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
    2981              :                                      var_vf);
    2982              :       else
    2983          221 :         niters_vector
    2984          221 :           = fold_build2 (PLUS_EXPR, type,
    2985              :                          fold_build2 (TRUNC_DIV_EXPR, type,
    2986              :                                       fold_build2 (MINUS_EXPR, type,
    2987              :                                                    ni_minus_gap,
    2988              :                                                    var_vf),
    2989              :                                       var_vf),
    2990              :                          build_int_cst (type, 1));
    2991        33479 :       step_vector = build_one_cst (type);
    2992              :     }
    2993              :   else
    2994              :     {
    2995           18 :       niters_vector = ni_minus_gap;
    2996           18 :       step_vector = build_int_cst (type, vf);
    2997              :     }
    2998              : 
    2999        33497 :   if (!is_gimple_val (niters_vector))
    3000              :     {
    3001        25629 :       var = create_tmp_var (type, "bnd");
    3002        25629 :       gimple_seq stmts = NULL;
    3003        25629 :       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
    3004        25629 :       gsi_insert_seq_on_edge_immediate (pe, stmts);
    3005              :       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
    3006              :          we set range information to make niters analyzer's life easier.
    3007              :          Note the number of latch iteration value can be TYPE_MAX_VALUE so
    3008              :          we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
    3009        25629 :       if (stmts != NULL
    3010        25629 :           && integer_onep (step_vector))
    3011              :         {
    3012        25614 :           if (niters_no_overflow)
    3013              :             {
    3014        25411 :               int_range<1> vr (type,
    3015        50822 :                                wi::one (TYPE_PRECISION (type)),
    3016        50822 :                                wi::div_trunc (wi::max_value
    3017        25411 :                                               (TYPE_PRECISION (type),
    3018        25411 :                                                TYPE_SIGN (type)),
    3019              :                                               const_vf,
    3020        50822 :                                               TYPE_SIGN (type)));
    3021        25411 :               set_range_info (niters_vector, vr);
    3022        25411 :             }
    3023              :           /* For VF == 1 the vector IV might also overflow so we cannot
    3024              :              assert a minimum value of 1.  */
    3025          203 :           else if (const_vf > 1)
    3026              :             {
    3027          167 :               int_range<1> vr (type,
    3028          334 :                                wi::one (TYPE_PRECISION (type)),
    3029          501 :                                wi::rshift (wi::max_value (TYPE_PRECISION (type),
    3030          167 :                                                           TYPE_SIGN (type))
    3031          334 :                                            - (const_vf - 1),
    3032          334 :                                            exact_log2 (const_vf), TYPE_SIGN (type))
    3033          668 :                                + 1);
    3034          167 :               set_range_info (niters_vector, vr);
    3035          167 :             }
    3036              :         }
    3037              :     }
    3038        33497 :   *niters_vector_ptr = niters_vector;
    3039        33497 :   *step_vector_ptr = step_vector;
    3040              : 
    3041        33497 :   return;
    3042              : }
    3043              : 
    3044              : /* Given NITERS_VECTOR which is the number of iterations for vectorized
    3045              :    loop specified by LOOP_VINFO after vectorization, compute the number
    3046              :    of iterations before vectorization (niters_vector * vf) and store it
    3047              :    to NITERS_VECTOR_MULT_VF_PTR.  */
    3048              : 
    3049              : static void
    3050        32704 : vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
    3051              :                                      tree niters_vector,
    3052              :                                      tree *niters_vector_mult_vf_ptr)
    3053              : {
    3054              :   /* We should be using a step_vector of VF if VF is variable.  */
    3055        32704 :   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
    3056        32704 :   tree type = TREE_TYPE (niters_vector);
    3057        32704 :   tree tree_vf = build_int_cst (type, vf);
    3058        32704 :   basic_block exit_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3059              : 
    3060        32704 :   gcc_assert (niters_vector_mult_vf_ptr != NULL);
    3061        32704 :   tree niters_vector_mult_vf = fold_build2 (MULT_EXPR, type,
    3062              :                                             niters_vector, tree_vf);
    3063              : 
    3064              :   /* If we've peeled a vector iteration then subtract one full vector
    3065              :      iteration.  */
    3066        32704 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3067           19 :     niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
    3068              :                                          niters_vector_mult_vf, tree_vf);
    3069              : 
    3070        32704 :   if (!is_gimple_val (niters_vector_mult_vf))
    3071              :     {
    3072        24859 :       tree var = create_tmp_var (type, "niters_vector_mult_vf");
    3073        24859 :       gimple_seq stmts = NULL;
    3074        24859 :       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
    3075              :                                                     &stmts, true, var);
    3076        24859 :       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
    3077        24859 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    3078              :     }
    3079        32704 :   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
    3080        32704 : }
    3081              : 
    3082              : /* Function slpeel_add_loop_guard adds guard skipping from the beginning
    3083              :    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
    3084              :    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
    3085              :    appear like below:
    3086              : 
    3087              :        guard_bb:
    3088              :          if (cond)
    3089              :            goto merge_bb;
    3090              :          else
    3091              :            goto skip_loop;
    3092              : 
    3093              :      skip_loop:
    3094              :        header_a:
    3095              :          i_1 = PHI<i_0, i_2>;
    3096              :          ...
    3097              :          i_2 = i_1 + 1;
    3098              :          if (cond_a)
    3099              :            goto latch_a;
    3100              :          else
    3101              :            goto exit_a;
    3102              :        latch_a:
    3103              :          goto header_a;
    3104              : 
    3105              :        exit_a:
    3106              :          i_5 = PHI<i_2>;
    3107              : 
    3108              :        merge_bb:
    3109              :          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
    3110              : 
    3111              :      update_loop:
    3112              :        header_b:
    3113              :          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
    3114              :          ...
    3115              :          i_4 = i_3 + 1;
    3116              :          if (cond_b)
    3117              :            goto latch_b;
    3118              :          else
    3119              :            goto exit_bb;
    3120              :        latch_b:
    3121              :          goto header_b;
    3122              : 
    3123              :        exit_bb:
    3124              : 
    3125              :    This function creates PHI nodes at merge_bb and replaces the use of i_5
    3126              :    in the update_loop's PHI node with the result of new PHI result.  */
    3127              : 
    3128              : static void
    3129        25262 : slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
    3130              :                                     class loop *update_loop,
    3131              :                                     edge guard_edge, edge merge_edge)
    3132              : {
    3133        25262 :   location_t merge_loc, guard_loc;
    3134        25262 :   edge orig_e = loop_preheader_edge (skip_loop);
    3135        25262 :   edge update_e = loop_preheader_edge (update_loop);
    3136        25262 :   gphi_iterator gsi_orig, gsi_update;
    3137              : 
    3138        25262 :   for ((gsi_orig = gsi_start_phis (skip_loop->header),
    3139        25262 :         gsi_update = gsi_start_phis (update_loop->header));
    3140        92405 :        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
    3141        67143 :        gsi_next (&gsi_orig), gsi_next (&gsi_update))
    3142              :     {
    3143        67143 :       gphi *orig_phi = gsi_orig.phi ();
    3144        67143 :       gphi *update_phi = gsi_update.phi ();
    3145              : 
    3146              :       /* Generate new phi node at merge bb of the guard.  */
    3147        67143 :       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    3148        67143 :       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
    3149              : 
    3150              :       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
    3151              :          args in NEW_PHI for these edges.  */
    3152        67143 :       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
    3153        67143 :       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
    3154        67143 :       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
    3155        67143 :       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
    3156        67143 :       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
    3157        67143 :       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
    3158              : 
    3159              :       /* Update phi in UPDATE_PHI.  */
    3160        67143 :       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
    3161              :     }
    3162        25262 : }
    3163              : 
    3164              : /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
    3165              :    Return a value that equals:
    3166              : 
    3167              :    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
    3168              :    - SKIP_VALUE when the main loop is skipped.  */
    3169              : 
    3170              : tree
    3171         3792 : vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
    3172              :                            tree skip_value)
    3173              : {
    3174         3792 :   gcc_assert (loop_vinfo->main_loop_edge);
    3175              : 
    3176         3792 :   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
    3177         3792 :   basic_block bb = loop_vinfo->main_loop_edge->dest;
    3178         3792 :   gphi *new_phi = create_phi_node (phi_result, bb);
    3179         3792 :   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
    3180              :                UNKNOWN_LOCATION);
    3181         3792 :   add_phi_arg (new_phi, skip_value,
    3182              :                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
    3183         3792 :   return phi_result;
    3184              : }
    3185              : 
    3186              : /* Function vect_do_peeling.
    3187              : 
    3188              :    Input:
    3189              :    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
    3190              : 
    3191              :        preheader:
    3192              :      LOOP:
    3193              :        header_bb:
    3194              :          loop_body
    3195              :          if (exit_loop_cond) goto exit_bb
    3196              :          else                goto header_bb
    3197              :        exit_bb:
    3198              : 
    3199              :    - NITERS: The number of iterations of the loop.
    3200              :    - NITERSM1: The number of iterations of the loop's latch.
    3201              :    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
    3202              :    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
    3203              :                               CHECK_PROFITABILITY is true.
    3204              :    Output:
    3205              :    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
    3206              :      iterate after vectorization; see vect_set_loop_condition for details.
    3207              :    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
    3208              :      should be set to the number of scalar iterations handled by the
    3209              :      vector loop.  The SSA name is only used on exit from the loop.
    3210              : 
    3211              :    This function peels prolog and epilog from the loop, adds guards skipping
    3212              :    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
    3213              :    would look like:
    3214              : 
    3215              :        guard_bb_1:
    3216              :          if (prefer_scalar_loop) goto merge_bb_1
    3217              :          else                    goto guard_bb_2
    3218              : 
    3219              :        guard_bb_2:
    3220              :          if (skip_prolog) goto merge_bb_2
    3221              :          else             goto prolog_preheader
    3222              : 
    3223              :        prolog_preheader:
    3224              :      PROLOG:
    3225              :        prolog_header_bb:
    3226              :          prolog_body
    3227              :          if (exit_prolog_cond) goto prolog_exit_bb
    3228              :          else                  goto prolog_header_bb
    3229              :        prolog_exit_bb:
    3230              : 
    3231              :        merge_bb_2:
    3232              : 
    3233              :        vector_preheader:
    3234              :      VECTOR LOOP:
    3235              :        vector_header_bb:
    3236              :          vector_body
    3237              :          if (exit_vector_cond) goto vector_exit_bb
    3238              :          else                  goto vector_header_bb
    3239              :        vector_exit_bb:
    3240              : 
    3241              :        guard_bb_3:
    3242              :          if (skip_epilog) goto merge_bb_3
    3243              :          else             goto epilog_preheader
    3244              : 
    3245              :        merge_bb_1:
    3246              : 
    3247              :        epilog_preheader:
    3248              :      EPILOG:
    3249              :        epilog_header_bb:
    3250              :          epilog_body
    3251              :          if (exit_epilog_cond) goto merge_bb_3
    3252              :          else                  goto epilog_header_bb
    3253              : 
    3254              :        merge_bb_3:
    3255              : 
    3256              :    Note this function peels prolog and epilog only if it's necessary,
    3257              :    as well as guards.
    3258              :    This function returns the epilogue loop if a decision was made to vectorize
    3259              :    it, otherwise NULL.
    3260              : 
    3261              :    The analysis resulting in this epilogue loop's loop_vec_info was performed
    3262              :    in the same vect_analyze_loop call as the main loop's.  At that time
    3263              :    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
    3264              :    vectorization factors than the main loop.  This list is chained in the
    3265              :    loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
    3266              :    vectorize the epilogue loop for a lower vectorization factor, the
    3267              :    loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
    3268              :    This is later used to vectorize the epilogue.
    3269              :    The reason the loop_vec_info needs updating is that it was
    3270              :    constructed based on the original main loop, and the epilogue loop is a
    3271              :    copy of this loop, so all links pointing to statements in the original loop
    3272              :    need updating.  Furthermore, these loop_vec_infos share the
    3273              :    data_reference's records, which will also need to be updated.
    3274              : 
    3275              :    TODO: Guard for prefer_scalar_loop should be emitted along with
    3276              :    versioning conditions if loop versioning is needed.  */
    3277              : 
    3278              : 
    3279              : class loop *
    3280        61566 : vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
    3281              :                  tree *niters_vector, tree *step_vector,
    3282              :                  tree *niters_vector_mult_vf_var, int th,
    3283              :                  bool check_profitability, bool niters_no_overflow,
    3284              :                  tree *advance)
    3285              : {
    3286        61566 :   edge e, guard_e;
    3287        61566 :   tree type = niters ? TREE_TYPE (niters) : sizetype;
    3288        61566 :   tree guard_cond;
    3289        61566 :   basic_block guard_bb, guard_to;
    3290        61566 :   profile_probability prob_prolog, prob_vector, prob_epilog;
    3291        61566 :   int estimated_vf;
    3292        61566 :   int prolog_peeling = 0;
    3293        61566 :   bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
    3294        61566 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    3295              : 
    3296        61566 :   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
    3297        61565 :     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
    3298              : 
    3299        61566 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    3300        61566 :   poly_uint64 bound_epilog = 0;
    3301        61566 :   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
    3302        61548 :       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
    3303        32385 :     bound_epilog += vf - 1;
    3304        61566 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
    3305          372 :     bound_epilog += 1;
    3306              : 
    3307              :   /* For early breaks the scalar loop needs to execute at most VF times
    3308              :      to find the element that caused the break.  */
    3309        61566 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3310         1409 :       && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
    3311        61566 :     bound_epilog = vf;
    3312              : 
    3313        61566 :   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
    3314        61566 :   poly_uint64 bound_scalar = bound_epilog;
    3315              : 
    3316        61566 :   if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ())
    3317         8132 :     dump_printf_loc (MSG_NOTE, vect_location,
    3318              :                      "early break does not require epilog.\n");
    3319              : 
    3320        61566 :   if (!prolog_peeling && !epilog_peeling)
    3321              :     return NULL;
    3322              : 
    3323              :   /* Before doing any peeling make sure to reset debug binds outside of
    3324              :      the loop referring to defs not in LC SSA.  */
    3325        32782 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    3326        99616 :   for (unsigned i = 0; i < loop->num_nodes; ++i)
    3327              :     {
    3328        66834 :       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
    3329        66834 :       imm_use_iterator ui;
    3330        66834 :       gimple *use_stmt;
    3331       159420 :       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
    3332        92586 :            gsi_next (&gsi))
    3333              :         {
    3334       385596 :           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
    3335       220244 :             if (gimple_debug_bind_p (use_stmt)
    3336        19820 :                 && loop != gimple_bb (use_stmt)->loop_father
    3337        19836 :                 && !flow_loop_nested_p (loop,
    3338           16 :                                         gimple_bb (use_stmt)->loop_father))
    3339              :               {
    3340            2 :                 gimple_debug_bind_reset_value (use_stmt);
    3341            2 :                 update_stmt (use_stmt);
    3342        92586 :               }
    3343              :         }
    3344       624675 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    3345       491007 :            gsi_next (&gsi))
    3346              :         {
    3347       491007 :           ssa_op_iter op_iter;
    3348       491007 :           def_operand_p def_p;
    3349       795502 :           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
    3350      1066326 :             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
    3351       493601 :               if (gimple_debug_bind_p (use_stmt)
    3352        36265 :                   && loop != gimple_bb (use_stmt)->loop_father
    3353        36292 :                   && !flow_loop_nested_p (loop,
    3354           27 :                                           gimple_bb (use_stmt)->loop_father))
    3355              :                 {
    3356            0 :                   gimple_debug_bind_reset_value (use_stmt);
    3357            0 :                   update_stmt (use_stmt);
    3358       304495 :                 }
    3359              :         }
    3360              :     }
    3361              : 
    3362        32782 :   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
    3363        32782 :   estimated_vf = vect_vf_for_cost (loop_vinfo);
    3364        32782 :   if (estimated_vf == 2)
    3365         6788 :     estimated_vf = 3;
    3366        32782 :   prob_prolog = prob_epilog = profile_probability::guessed_always ()
    3367        32782 :                         .apply_scale (estimated_vf - 1, estimated_vf);
    3368              : 
    3369        32782 :   class loop *prolog = NULL, *epilog = NULL;
    3370        32782 :   class loop *first_loop = loop;
    3371        32782 :   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    3372              : 
    3373              :   /* SSA form needs to be up-to-date since we are going to manually
    3374              :      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
    3375              :      update SSA state after that, so we have to make sure to not lose any
    3376              :      pending update needs.  */
    3377        32782 :   gcc_assert (!need_ssa_update_p (cfun));
    3378              : 
    3379              :   /* If we're vectorizing an epilogue loop, we have ensured that the
    3380              :      virtual operand is in SSA form throughout the vectorized main loop.
    3381              :      Normally it is possible to trace the updated
    3382              :      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
    3383              :      back to scalar-stmt vuses, meaning that the effect of the SSA update
    3384              :      remains local to the main loop.  However, there are rare cases in
    3385              :      which the vectorized loop should have vdefs even when the original scalar
    3386              :      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
    3387              :      introduces clobbers of the temporary vector array, which in turn
    3388              :      needs new vdefs.  If the scalar loop doesn't write to memory, these
    3389              :      new vdefs will be the only ones in the vector loop.
    3390              :      We are currently defering updating virtual SSA form and creating
    3391              :      of a virtual PHI for this case so we do not have to make sure the
    3392              :      newly introduced virtual def is in LCSSA form.  */
    3393              : 
    3394        32782 :   if (MAY_HAVE_DEBUG_BIND_STMTS)
    3395              :     {
    3396        12311 :       gcc_assert (!adjust_vec.exists ());
    3397        12311 :       adjust_vec.create (32);
    3398              :     }
    3399        32782 :   initialize_original_copy_tables ();
    3400              : 
    3401              :   /* Record the anchor bb at which the guard should be placed if the scalar
    3402              :      loop might be preferred.  */
    3403        32782 :   basic_block anchor = loop_preheader_edge (loop)->src;
    3404              : 
    3405              :   /* Generate the number of iterations for the prolog loop.  We do this here
    3406              :      so that we can also get the upper bound on the number of iterations.  */
    3407        32782 :   tree niters_prolog;
    3408        32782 :   poly_int64 bound_prolog = 0;
    3409        32782 :   if (prolog_peeling)
    3410              :     {
    3411          428 :       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
    3412              :                                                    &bound_prolog);
    3413              :       /* If algonment peeling is known, we will always execute prolog.  */
    3414          428 :       if (TREE_CODE (niters_prolog) == INTEGER_CST)
    3415          226 :         prob_prolog = profile_probability::always ();
    3416              :     }
    3417              :   else
    3418        32354 :     niters_prolog = build_int_cst (type, 0);
    3419              : 
    3420        32782 :   loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
    3421        32782 :   tree niters_vector_mult_vf = NULL_TREE;
    3422              :   /* Saving NITERs before the loop, as this may be changed by prologue.  */
    3423        32782 :   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
    3424        32782 :   edge update_e = NULL, skip_e = NULL;
    3425        32782 :   unsigned int lowest_vf = constant_lower_bound (vf);
    3426              :   /* Prolog loop may be skipped.  */
    3427        32782 :   bool skip_prolog = (prolog_peeling != 0);
    3428              :   /* Skip this loop to epilog when there are not enough iterations to enter this
    3429              :      vectorized loop.  If true we should perform runtime checks on the NITERS
    3430              :      to check whether we should skip the current vectorized loop.  If we know
    3431              :      the number of scalar iterations we may choose to add a runtime check if
    3432              :      this number "maybe" smaller than the number of iterations required
    3433              :      when we know the number of scalar iterations may potentially
    3434              :      be smaller than the number of iterations required to enter this loop, for
    3435              :      this we use the upper bounds on the prolog and epilog peeling.  When we
    3436              :      don't know the number of iterations and don't require versioning it is
    3437              :      because we have asserted that there are enough scalar iterations to enter
    3438              :      the main loop, so this skip is not necessary.  When we are versioning then
    3439              :      we only add such a skip if we have chosen to vectorize the epilogue.  */
    3440        32782 :   bool skip_vector = false;
    3441        32782 :   if (!uncounted_p)
    3442         7890 :     skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3443        40629 :                    ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
    3444         7890 :                                bound_prolog + bound_epilog)
    3445        24874 :                    : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
    3446           40 :                       || vect_epilogues));
    3447              : 
    3448              :   /* Epilog loop must be executed if the number of iterations for epilog
    3449              :      loop is known at compile time, otherwise we need to add a check at
    3450              :      the end of vector loop and skip to the end of epilog loop.  */
    3451        32782 :   bool skip_epilog = (prolog_peeling < 0
    3452        32580 :                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
    3453        32782 :                       || !vf.is_constant ());
    3454              :   /* PEELING_FOR_GAPS and peeling for early breaks are special because epilog
    3455              :      loop must be executed.  */
    3456        32782 :   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
    3457        32410 :       || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3458         1201 :     skip_epilog = false;
    3459              : 
    3460        32782 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3461        32782 :   auto_vec<profile_count> original_counts;
    3462        32782 :   basic_block *original_bbs = NULL;
    3463              : 
    3464        32782 :   if (skip_vector)
    3465              :     {
    3466        24834 :       split_edge (loop_preheader_edge (loop));
    3467              : 
    3468        24834 :       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
    3469              :         {
    3470        22614 :           original_bbs = get_loop_body (loop);
    3471        68255 :           for (unsigned int i = 0; i < loop->num_nodes; i++)
    3472        45641 :             original_counts.safe_push(original_bbs[i]->count);
    3473              :         }
    3474              : 
    3475              :       /* Due to the order in which we peel prolog and epilog, we first
    3476              :          propagate probability to the whole loop.  The purpose is to
    3477              :          avoid adjusting probabilities of both prolog and vector loops
    3478              :          separately.  Note in this case, the probability of epilog loop
    3479              :          needs to be scaled back later.  */
    3480        24834 :       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
    3481        24834 :       if (prob_vector.initialized_p ())
    3482              :         {
    3483        24834 :           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
    3484        24834 :           scale_loop_profile (loop, prob_vector, -1);
    3485              :         }
    3486              :     }
    3487              : 
    3488        32782 :   if (vect_epilogues)
    3489              :     {
    3490              :       /* Make sure to set the epilogue's epilogue scalar loop, such that we can
    3491              :          use the original scalar loop as remaining epilogue if necessary.  */
    3492         6823 :       LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
    3493         6823 :         = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    3494         6823 :       LOOP_VINFO_SCALAR_MAIN_EXIT (epilogue_vinfo)
    3495         6823 :         = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3496              :     }
    3497              : 
    3498        32782 :   if (prolog_peeling)
    3499              :     {
    3500          428 :       e = loop_preheader_edge (loop);
    3501          428 :       edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3502          428 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
    3503              :                            && (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3504              :                                || uncounted_p));
    3505              : 
    3506              :       /* Peel prolog and put it on preheader edge of loop.  */
    3507          428 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3508          428 :       edge prolog_e = NULL;
    3509          428 :       bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
    3510          428 :       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
    3511              :                                                        scalar_loop, scalar_e,
    3512              :                                                        e, &prolog_e, true, NULL,
    3513              :                                                        uncounted_p, uncounted_p,
    3514              :                                                        early_break_peel_p);
    3515              : 
    3516          428 :       gcc_assert (prolog);
    3517          428 :       prolog->force_vectorize = false;
    3518              : 
    3519              :       /* Assign hierarchical discriminators to distinguish prolog loop.  */
    3520          428 :       gimple *prolog_last = last_nondebug_stmt (prolog->header);
    3521          428 :       location_t prolog_loc
    3522          428 :         = prolog_last ? gimple_location (prolog_last) : UNKNOWN_LOCATION;
    3523          428 :       if (prolog_loc != UNKNOWN_LOCATION)
    3524              :         {
    3525          426 :           unsigned int prolog_copyid = allocate_copyid_base (prolog_loc, 1);
    3526          426 :           assign_discriminators_to_loop (prolog, 0, prolog_copyid);
    3527              :         }
    3528          428 :       first_loop = prolog;
    3529          428 :       reset_original_copy_tables ();
    3530              : 
    3531              :       /* Update the number of iterations for prolog loop.  */
    3532          428 :       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
    3533          428 :       vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
    3534              :                                step_prolog, NULL_TREE, false);
    3535              : 
    3536              :       /* Skip the prolog loop.  */
    3537          428 :       if (skip_prolog)
    3538              :         {
    3539          428 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3540              :                                     niters_prolog, build_int_cst (type, 0));
    3541          428 :           guard_bb = loop_preheader_edge (prolog)->src;
    3542          428 :           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
    3543          428 :           guard_to = split_edge (loop_preheader_edge (loop));
    3544          428 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3545              :                                            guard_to, guard_bb,
    3546              :                                            prob_prolog.invert (),
    3547              :                                            irred_flag);
    3548         1958 :           for (edge alt_e : get_loop_exit_edges (prolog))
    3549              :             {
    3550          674 :               if (alt_e == prolog_e)
    3551          428 :                 continue;
    3552          246 :               basic_block old_dom
    3553          246 :                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
    3554          246 :               if (flow_bb_inside_loop_p (prolog, old_dom))
    3555              :                 {
    3556          101 :                   auto_vec<basic_block, 8> queue;
    3557          101 :                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
    3558          325 :                        son; son = next_dom_son (CDI_DOMINATORS, son))
    3559          224 :                     if (!flow_bb_inside_loop_p (prolog, son))
    3560          123 :                       queue.safe_push (son);
    3561          426 :                   for (auto son : queue)
    3562          123 :                     set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
    3563          101 :                 }
    3564          428 :             }
    3565              : 
    3566          428 :           e = EDGE_PRED (guard_to, 0);
    3567          428 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3568          428 :           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
    3569              : 
    3570          428 :           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
    3571          428 :           scale_loop_profile (prolog, prob_prolog,
    3572          428 :                               estimated_poly_value (bound_prolog) - 1);
    3573              :         }
    3574              : 
    3575              :       /* Update init address of DRs.  */
    3576          428 :       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
    3577          428 :       if (!uncounted_p)
    3578              :         {
    3579              :           /* Update niters for vector loop.  */
    3580          397 :           LOOP_VINFO_NITERS (loop_vinfo)
    3581          397 :             = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
    3582          397 :           LOOP_VINFO_NITERSM1 (loop_vinfo)
    3583          397 :             = fold_build2 (MINUS_EXPR, type,
    3584              :                            LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
    3585              :         }
    3586          428 :       bool new_var_p = false;
    3587          428 :       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
    3588              :       /* It's guaranteed that vector loop bound before vectorization is at
    3589              :          least VF, so set range information for newly generated var.  */
    3590          428 :       if (new_var_p)
    3591              :         {
    3592          212 :           int_range<1> vr (type,
    3593          424 :                            wi::to_wide (build_int_cst (type, lowest_vf)),
    3594          424 :                            wi::to_wide (TYPE_MAX_VALUE (type)));
    3595          212 :           set_range_info (niters, vr);
    3596          212 :         }
    3597              : 
    3598              :       /* Prolog iterates at most bound_prolog times, latch iterates at
    3599              :          most bound_prolog - 1 times.  */
    3600          428 :       if (bound_prolog.is_constant ())
    3601          428 :         record_niter_bound (prolog, bound_prolog.to_constant () - 1, false,
    3602              :                             true);
    3603          428 :       delete_update_ssa ();
    3604          428 :       adjust_vec_debug_stmts ();
    3605          428 :       scev_reset ();
    3606              :     }
    3607        32782 :   basic_block bb_before_epilog = NULL;
    3608              : 
    3609        32782 :   if (epilog_peeling)
    3610              :     {
    3611        32747 :       e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3612        32747 :       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
    3613              : 
    3614              :       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
    3615              :          said epilog then we should use a copy of the main loop as a starting
    3616              :          point.  This loop may have already had some preliminary transformations
    3617              :          to allow for more optimal vectorization, for example if-conversion.
    3618              :          If we are not vectorizing the epilog then we should use the scalar loop
    3619              :          as the transformations mentioned above make less or no sense when not
    3620              :          vectorizing.  */
    3621        32747 :       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
    3622        32747 :       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
    3623         6823 :       edge epilog_e = vect_epilogues ? e : scalar_e;
    3624        32747 :       edge new_epilog_e = NULL;
    3625        32747 :       auto_vec<basic_block> doms;
    3626        32747 :       bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
    3627        32747 :       epilog
    3628        32747 :         = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
    3629              :                                                   &new_epilog_e, true, &doms,
    3630              :                                                   uncounted_p, false,
    3631              :                                                   early_break_peel_p);
    3632              : 
    3633        32747 :       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
    3634        32747 :       gcc_assert (epilog);
    3635        32747 :       gcc_assert (new_epilog_e);
    3636        32747 :       epilog->force_vectorize = false;
    3637        32747 :       bb_before_epilog = loop_preheader_edge (epilog)->src;
    3638              : 
    3639              :       /* Assign hierarchical discriminators to distinguish epilog loop.
    3640              :          Only assign if it's a scalar epilog.  If it will be vectorized
    3641              :          (vect_epilogues), discriminators will be assigned.
    3642              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    3643        32747 :       if (!vect_epilogues)
    3644              :         {
    3645        25924 :           gimple *epilog_last = last_nondebug_stmt (epilog->header);
    3646        25924 :           location_t epilog_loc
    3647        25924 :             = epilog_last ? gimple_location (epilog_last) : UNKNOWN_LOCATION;
    3648        25895 :           if (epilog_loc != UNKNOWN_LOCATION)
    3649              :             {
    3650        21092 :               unsigned int epilog_copyid = allocate_copyid_base (epilog_loc, 1);
    3651        21092 :               assign_discriminators_to_loop (epilog, 0, epilog_copyid);
    3652              :             }
    3653              :         }
    3654              : 
    3655              :       /* Scalar version loop may be preferred.  In this case, add guard
    3656              :          and skip to epilog.  Note this only happens when the number of
    3657              :          iterations of loop is unknown at compile time, otherwise this
    3658              :          won't be vectorized.  */
    3659        32747 :       if (skip_vector)
    3660              :         {
    3661              :           /* Additional epilogue iteration is peeled if gap exists.  */
    3662        24834 :           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
    3663        24834 :                                                 bound_prolog, bound_epilog,
    3664              :                                                 th, &bound_scalar,
    3665              :                                                 check_profitability);
    3666              :           /* Build guard against NITERSM1 since NITERS may overflow.  */
    3667        24834 :           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
    3668        24834 :           guard_bb = anchor;
    3669        24834 :           guard_to = split_edge (loop_preheader_edge (epilog));
    3670        24834 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
    3671              :                                            guard_to, guard_bb,
    3672              :                                            prob_vector.invert (),
    3673              :                                            irred_flag);
    3674        24834 :           skip_e = guard_e;
    3675        24834 :           e = EDGE_PRED (guard_to, 0);
    3676        24834 :           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
    3677              : 
    3678              :           /* Handle any remaining dominator updates needed after
    3679              :              inserting the loop skip edge above.  */
    3680        24834 :           if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3681          276 :               && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
    3682          138 :               && prolog_peeling)
    3683              :             {
    3684              :               /* Adding a skip edge to skip a loop with multiple exits
    3685              :                  means the dominator of the join blocks for all exits shifts
    3686              :                  from the prolog skip guard to the loop skip guard.  */
    3687           27 :               auto prolog_skip_bb
    3688           27 :                 = single_pred (loop_preheader_edge (prolog)->src);
    3689           27 :               auto needs_update
    3690           27 :                 = get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
    3691              : 
    3692              :               /* Update everything except for the immediate children of
    3693              :                  the prolog skip block (the prolog and vector preheaders).
    3694              :                  Those should remain dominated by the prolog skip block itself,
    3695              :                  since the loop guard edge goes to the epilogue.  */
    3696          170 :               for (auto bb : needs_update)
    3697           89 :                 if (bb != EDGE_SUCC (prolog_skip_bb, 0)->dest
    3698           89 :                     && bb != EDGE_SUCC (prolog_skip_bb, 1)->dest)
    3699           35 :                   set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
    3700           27 :             }
    3701              : 
    3702        24834 :           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
    3703              : 
    3704              :           /* Simply propagate profile info from guard_bb to guard_to which is
    3705              :              a merge point of control flow.  */
    3706        24834 :           profile_count old_count = guard_to->count;
    3707        24834 :           guard_to->count = guard_bb->count;
    3708              : 
    3709              :           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
    3710        24834 :           if (vect_epilogues || scalar_loop == NULL)
    3711              :             {
    3712        22614 :               gcc_assert(epilog->num_nodes == loop->num_nodes);
    3713        22614 :               basic_block *bbs = get_loop_body (epilog);
    3714        68255 :               for (unsigned int i = 0; i < epilog->num_nodes; i++)
    3715              :                 {
    3716        45641 :                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
    3717        45641 :                   bbs[i]->count = original_counts[i];
    3718              :                 }
    3719        22614 :               free (bbs);
    3720        22614 :               free (original_bbs);
    3721              :             }
    3722         2220 :           else if (old_count.nonzero_p ())
    3723         2220 :             scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
    3724              : 
    3725              :           /* Only need to handle basic block before epilog loop if it's not
    3726              :              the guard_bb, which is the case when skip_vector is true.  */
    3727        24834 :           if (guard_bb != bb_before_epilog && single_pred_p (bb_before_epilog))
    3728        24696 :             bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
    3729        24834 :           bb_before_epilog = loop_preheader_edge (epilog)->src;
    3730              :         }
    3731              : 
    3732        32747 :       if (!uncounted_p)
    3733              :         {
    3734              :           /* If loop is peeled for non-zero constant times, now niters refers to
    3735              :              orig_niters - prolog_peeling, it won't overflow even the
    3736              :              orig_niters overflows.  */
    3737        32704 :           niters_no_overflow |= (prolog_peeling > 0);
    3738        32704 :           vect_gen_vector_loop_niters (loop_vinfo, niters,
    3739              :                                        niters_vector, step_vector,
    3740              :                                        niters_no_overflow);
    3741        32704 :           if (!integer_onep (*step_vector))
    3742              :             {
    3743              :               /* On exit from the loop we will have an easy way of calcalating
    3744              :                  NITERS_VECTOR / STEP * STEP.  Install a dummy definition
    3745              :                  until then.  */
    3746            0 :               niters_vector_mult_vf
    3747            0 :                 = make_ssa_name (TREE_TYPE (*niters_vector));
    3748            0 :               SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
    3749            0 :               *niters_vector_mult_vf_var = niters_vector_mult_vf;
    3750              :             }
    3751              :           else
    3752        32704 :             vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
    3753              :                                                  &niters_vector_mult_vf);
    3754              :           /* Update IVs of original loop as if they were advanced by
    3755              :              niters_vector_mult_vf steps.  */
    3756        32704 :           gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
    3757        32704 :           update_e = skip_vector ? e : loop_preheader_edge (epilog);
    3758              :         }
    3759        32747 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3760          825 :         update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
    3761              : 
    3762              :       /* If we have a peeled vector iteration we will never skip the epilog loop
    3763              :          and we can simplify the cfg a lot by not doing the edge split.  */
    3764        32747 :       if (skip_epilog
    3765        32747 :           || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
    3766          825 :               && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
    3767              :         {
    3768        25154 :           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
    3769              :                                     niters, niters_vector_mult_vf);
    3770              : 
    3771        25154 :           guard_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
    3772        25154 :           edge epilog_e = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3773        25154 :           guard_to = epilog_e->dest;
    3774        25654 :           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
    3775              :                                            skip_vector ? anchor : guard_bb,
    3776              :                                            prob_epilog.invert (),
    3777              :                                            irred_flag);
    3778              : 
    3779        25154 :           doms.safe_push (guard_to);
    3780        25154 :           if (vect_epilogues)
    3781         5473 :             epilogue_vinfo->skip_this_loop_edge = guard_e;
    3782        25154 :           edge main_iv = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    3783        25154 :           gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
    3784        25154 :           for (gphi_iterator gsi = gsi_start_phis (guard_to);
    3785        61489 :                !gsi_end_p (gsi); gsi_next (&gsi))
    3786              :             {
    3787              :               /* We are expecting all of the PHIs we have on epilog_e
    3788              :                  to be also on the main loop exit.  But sometimes
    3789              :                  a stray virtual definition can appear at epilog_e
    3790              :                  which we can then take as the same on all exits,
    3791              :                  we've removed the LC SSA PHI on the main exit before
    3792              :                  so we wouldn't need to create a loop PHI for it.  */
    3793        36335 :               if (virtual_operand_p (gimple_phi_result (*gsi))
    3794        36335 :                   && (gsi_end_p (gsi2)
    3795        36576 :                       || !virtual_operand_p (gimple_phi_result (*gsi2))))
    3796          172 :                 add_phi_arg (*gsi,
    3797           86 :                              gimple_phi_arg_def_from_edge (*gsi, epilog_e),
    3798              :                              guard_e, UNKNOWN_LOCATION);
    3799              :               else
    3800              :                 {
    3801        36249 :                   add_phi_arg (*gsi, gimple_phi_result (*gsi2), guard_e,
    3802              :                                UNKNOWN_LOCATION);
    3803        36249 :                   gsi_next (&gsi2);
    3804              :                 }
    3805              :             }
    3806              : 
    3807              :           /* Only need to handle basic block before epilog loop if it's not
    3808              :              the guard_bb, which is the case when skip_vector is true.  */
    3809        25154 :           if (guard_bb != bb_before_epilog)
    3810              :             {
    3811        25108 :               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
    3812              : 
    3813        25108 :               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
    3814              :             }
    3815        25154 :           scale_loop_profile (epilog, prob_epilog, -1);
    3816              :         }
    3817              : 
    3818              :       /* If we have a peeled vector iteration, all exits are the same, leave it
    3819              :          and so the main exit needs to be treated the same as the alternative
    3820              :          exits in that we leave their updates to vectorizable_live_operations.
    3821              :          */
    3822        32747 :       tree vector_iters_vf = niters_vector_mult_vf;
    3823        32747 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3824              :         {
    3825          825 :           tree tmp_niters_vf
    3826          825 :             = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
    3827              : 
    3828           43 :           if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
    3829          911 :                 && get_loop_exit_edges (loop).length () == 1)
    3830          839 :               && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
    3831              :           {
    3832          615 :             basic_block exit_bb = NULL;
    3833          615 :             edge update_e = NULL;
    3834              : 
    3835              :             /* Identify the early exit merge block.  I wish we had stored
    3836              :                this.  */
    3837         1845 :             for (auto e : get_loop_exit_edges (loop))
    3838          615 :               if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
    3839              :                 {
    3840          615 :                   exit_bb = e->dest;
    3841          615 :                   update_e = single_succ_edge (exit_bb);
    3842          615 :                   break;
    3843          615 :                 }
    3844          615 :             vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
    3845              :                                               update_e, true);
    3846              :           }
    3847          825 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    3848              :             vector_iters_vf = tmp_niters_vf;
    3849              : 
    3850          825 :           LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
    3851              :         }
    3852              : 
    3853        32747 :         bool recalculate_peel_niters_init
    3854        32747 :           = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
    3855        32747 :         vect_update_ivs_after_vectorizer (loop_vinfo, vector_iters_vf,
    3856              :                                           update_e,
    3857              :                                           recalculate_peel_niters_init);
    3858              : 
    3859              :       /* Recalculate the dominators after adding the guard edge.  */
    3860        32747 :       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3861          825 :         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
    3862              : 
    3863              :       /* When we do not have a loop-around edge to the epilog we know
    3864              :          the vector loop covered at least VF scalar iterations unless
    3865              :          we have early breaks.
    3866              :          Update any known upper bound with this knowledge.  */
    3867        32747 :       if (! skip_vector
    3868         7913 :           && ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
    3869              :         {
    3870         7364 :           if (epilog->any_upper_bound)
    3871         7364 :             epilog->nb_iterations_upper_bound -= lowest_vf;
    3872         7364 :           if (epilog->any_likely_upper_bound)
    3873         7364 :             epilog->nb_iterations_likely_upper_bound -= lowest_vf;
    3874         7364 :           if (epilog->any_estimate)
    3875         7362 :             epilog->nb_iterations_estimate -= lowest_vf;
    3876              :         }
    3877              : 
    3878        32747 :       unsigned HOST_WIDE_INT bound;
    3879        32747 :       if (bound_scalar.is_constant (&bound))
    3880              :         {
    3881        32747 :           gcc_assert (bound != 0);
    3882              :           /* Adjust the upper bound by the extra peeled vector iteration if we
    3883              :              are an epilogue of an peeled vect loop and not VLA.  For VLA the
    3884              :              loop bounds are unknown.  */
    3885        65475 :           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    3886        32747 :               && vf.is_constant ())
    3887           62 :             bound += vf.to_constant ();
    3888              :           /* -1 to convert loop iterations to latch iterations.  */
    3889        32747 :           record_niter_bound (epilog, bound - 1, false, true);
    3890        32747 :           scale_loop_profile (epilog, profile_probability::always (),
    3891              :                               bound - 1);
    3892              :         }
    3893              : 
    3894        32747 :       delete_update_ssa ();
    3895        32747 :       adjust_vec_debug_stmts ();
    3896        32747 :       scev_reset ();
    3897        32747 :     }
    3898              : 
    3899        32782 :   if (vect_epilogues)
    3900              :     {
    3901         6823 :       epilog->aux = epilogue_vinfo;
    3902         6823 :       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
    3903         6823 :       LOOP_VINFO_MAIN_EXIT (epilogue_vinfo)
    3904         6823 :         = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
    3905              : 
    3906         6823 :       loop_constraint_clear (epilog, LOOP_C_INFINITE);
    3907              : 
    3908              :       /* We now must calculate the number of NITERS performed by the previous
    3909              :          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
    3910         6823 :       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
    3911              :                                  niters_prolog, niters_vector_mult_vf);
    3912              : 
    3913              :       /* If skip_vector we may skip the previous loop, we insert a phi-node to
    3914              :          determine whether we are coming from the previous vectorized loop
    3915              :          using the update_e edge or the skip_vector basic block using the
    3916              :          skip_e edge.  */
    3917         6823 :       if (skip_vector)
    3918              :         {
    3919         5543 :           gcc_assert (update_e != NULL && skip_e != NULL);
    3920         5543 :           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
    3921              :                                            update_e->dest);
    3922         5543 :           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
    3923         5543 :           gimple *stmt = gimple_build_assign (new_ssa, niters);
    3924         5543 :           gimple_stmt_iterator gsi;
    3925         5543 :           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
    3926         5543 :               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
    3927              :             {
    3928         5543 :               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
    3929         5543 :               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
    3930              :             }
    3931              :           else
    3932              :             {
    3933            0 :               gsi = gsi_last_bb (update_e->src);
    3934            0 :               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
    3935              :             }
    3936              : 
    3937         5543 :           niters = new_ssa;
    3938         5543 :           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
    3939         5543 :           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
    3940              :                        UNKNOWN_LOCATION);
    3941         5543 :           niters = PHI_RESULT (new_phi);
    3942         5543 :           epilogue_vinfo->main_loop_edge = update_e;
    3943         5543 :           epilogue_vinfo->skip_main_loop_edge = skip_e;
    3944              :         }
    3945              : 
    3946              :       /* Set ADVANCE to the number of iterations performed by the previous
    3947              :          loop and its prologue.  */
    3948         6823 :       *advance = niters;
    3949              : 
    3950              :       /* Subtract the number of iterations performed by the vectorized loop
    3951              :          from the number of total iterations.  */
    3952         6823 :       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
    3953              :                                           before_loop_niters,
    3954              :                                           niters);
    3955              : 
    3956         6823 :       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
    3957         6823 :       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
    3958         6823 :         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
    3959              :                        epilogue_niters,
    3960              :                        build_one_cst (TREE_TYPE (epilogue_niters)));
    3961              :     }
    3962              : 
    3963        32782 :   adjust_vec.release ();
    3964        32782 :   free_original_copy_tables ();
    3965              : 
    3966        32782 :   return vect_epilogues ? epilog : NULL;
    3967        32782 : }
    3968              : 
    3969              : /* Function vect_create_cond_for_niters_checks.
    3970              : 
    3971              :    Create a conditional expression that represents the run-time checks for
    3972              :    loop's niter.  The loop is guaranteed to terminate if the run-time
    3973              :    checks hold.
    3974              : 
    3975              :    Input:
    3976              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    3977              :                 with logical AND operation.  If it is NULL, then the function
    3978              :                 is used to return the number of alias checks.
    3979              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    3980              :                 to be checked.
    3981              : 
    3982              :    Output:
    3983              :    COND_EXPR - conditional expression.
    3984              : 
    3985              :    The returned COND_EXPR is the conditional expression to be used in the
    3986              :    if statement that controls which version of the loop gets executed at
    3987              :    runtime.  */
    3988              : 
    3989              : static void
    3990          376 : vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
    3991              : {
    3992          376 :   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
    3993              : 
    3994          376 :   if (*cond_expr)
    3995          376 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    3996              :                               *cond_expr, part_cond_expr);
    3997              :   else
    3998            0 :     *cond_expr = part_cond_expr;
    3999          376 : }
    4000              : 
    4001              : /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    4002              :    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
    4003              : 
    4004              : static void
    4005          307 : chain_cond_expr (tree *cond_expr, tree part_cond_expr)
    4006              : {
    4007          307 :   if (*cond_expr)
    4008          303 :     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4009              :                               *cond_expr, part_cond_expr);
    4010              :   else
    4011            4 :     *cond_expr = part_cond_expr;
    4012          307 : }
    4013              : 
    4014              : /* Function vect_create_cond_for_align_checks.
    4015              : 
    4016              :    Create a conditional expression that represents the alignment checks for
    4017              :    all of data references (array element references) whose alignment must be
    4018              :    checked at runtime.
    4019              : 
    4020              :    Input:
    4021              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4022              :                 with logical AND operation.
    4023              :    LOOP_VINFO - three fields of the loop information are used.
    4024              :                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
    4025              :                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
    4026              :                 LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT indicates which check applies.
    4027              : 
    4028              :    Output:
    4029              :    COND_EXPR_STMT_LIST - statements needed to construct the conditional
    4030              :                          expression.
    4031              :    The returned value is the conditional expression to be used in the if
    4032              :    statement that controls which version of the loop gets executed at runtime.
    4033              : 
    4034              :    Based on the boolean value of LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT, we decide
    4035              :    which type of check should be applied and create two different expressions
    4036              :    accordingly.
    4037              :      1) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is false, we see if all data refs
    4038              :         to be checked are already aligned to an alignment boundary.  We create
    4039              :         an expression of "(a_1 | a_2 | a_3 | ... | a_n) & mask", where "a_i" is
    4040              :         the address of i'th data reference.
    4041              :      2) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we see if all data refs
    4042              :         can be aligned to a boundary after a certain amount of peeling, in other
    4043              :         words, their addresses have the same bottom bits according to the mask.
    4044              :         We create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask",
    4045              :         where "a_i" is the address of i'th data reference.
    4046              : 
    4047              :    Both algorithms make two assumptions:
    4048              :      1) The number of bytes "n" in a vector is a power of 2.
    4049              :      2) An address "a" is aligned if a%n is zero and that this
    4050              :         test can be done as a&(n-1) == 0.  For example, for 16
    4051              :         byte vectors the test is a&0xf == 0.  */
    4052              : 
    4053              : static void
    4054           45 : vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
    4055              :                                    tree *cond_expr,
    4056              :                                    gimple_seq *cond_expr_stmt_list)
    4057              : {
    4058           45 :   const vec<stmt_vec_info> &may_misalign_stmts
    4059              :     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    4060           45 :   stmt_vec_info stmt_info;
    4061           45 :   poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
    4062           45 :   tree mask_cst;
    4063           45 :   unsigned int i;
    4064           45 :   tree int_ptrsize_type;
    4065           45 :   char tmp_name[30];
    4066           45 :   tree or_tmp_name = NULL_TREE;
    4067           45 :   tree prev_addr_tmp_name = NULL_TREE;
    4068           45 :   tree and_tmp_name;
    4069           45 :   gimple *and_stmt;
    4070           45 :   tree ptrsize_zero;
    4071           45 :   tree part_cond_expr;
    4072              : 
    4073           45 :   gcc_assert (known_ne (mask, 0U));
    4074              : 
    4075           45 :   int_ptrsize_type = signed_type_for (ptr_type_node);
    4076              : 
    4077              :   /* If LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we should have at least two
    4078              :      datarefs to check the mutual alignment.  */
    4079           45 :   gcc_assert (may_misalign_stmts.length () > 1
    4080              :               || !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo));
    4081              : 
    4082          109 :   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    4083              :     {
    4084           64 :       gimple_seq new_stmt_list = NULL;
    4085           64 :       tree addr_base;
    4086           64 :       tree addr_tmp_name;
    4087           64 :       tree xor_tmp_name;
    4088           64 :       tree new_or_tmp_name;
    4089           64 :       gimple *addr_stmt, *or_stmt, *xor_stmt;
    4090           64 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4091           64 :       bool negative = tree_int_cst_compare
    4092           64 :         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
    4093           64 :       tree offset = negative
    4094           64 :         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
    4095              :                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
    4096           64 :         : size_zero_node;
    4097              : 
    4098              :       /* create: addr_tmp = (int)(address_of_first_vector) */
    4099           64 :       addr_base =
    4100           64 :         vect_create_addr_base_for_vector_ref (loop_vinfo,
    4101              :                                               stmt_info, &new_stmt_list,
    4102              :                                               offset);
    4103           64 :       if (new_stmt_list != NULL)
    4104           14 :         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
    4105              : 
    4106           64 :       sprintf (tmp_name, "addr2int%d", i);
    4107           64 :       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
    4108           64 :       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
    4109           64 :       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
    4110              : 
    4111           64 :       if (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo))
    4112              :         {
    4113              :           /* Create "((a_1 ^ a_2) | (a_2 ^ a_3) | ... | (a_n-1 ^ a_n)) & mask"
    4114              :              to check mutual alignment.  */
    4115           36 :           if (prev_addr_tmp_name != NULL_TREE)
    4116              :             {
    4117           18 :               sprintf (tmp_name, "xorptrs%d_%d", i - 1, i);
    4118           18 :               xor_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4119              :                                                  tmp_name);
    4120           18 :               xor_stmt = gimple_build_assign (xor_tmp_name, BIT_XOR_EXPR,
    4121              :                                               prev_addr_tmp_name,
    4122              :                                               addr_tmp_name);
    4123           18 :               gimple_seq_add_stmt (cond_expr_stmt_list, xor_stmt);
    4124           18 :               if (or_tmp_name == NULL_TREE)
    4125              :                 {
    4126              :                   /* Create the 1st XOR when the 2nd data ref is seen.  */
    4127              :                   or_tmp_name = xor_tmp_name;
    4128              :                 }
    4129              :               else
    4130              :                 {
    4131              :                   /* Create: or_tmp = or_tmp | new_xor_tmp.  */
    4132            0 :                   sprintf (tmp_name, "orxors%d", i - 1);
    4133            0 :                   new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4134              :                                                         tmp_name);
    4135            0 :                   or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4136              :                                                  or_tmp_name, xor_tmp_name);
    4137            0 :                   gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4138            0 :                   or_tmp_name = new_or_tmp_name;
    4139              :                 }
    4140              :             }
    4141              :           prev_addr_tmp_name = addr_tmp_name;
    4142              :         }
    4143              :       else
    4144              :         {
    4145              :           /* Create: "(a_1 | a_2 | a_3 | ... | a_n) & mask" to check if all
    4146              :              addresses are already aligned.  */
    4147           28 :           if (or_tmp_name != NULL_TREE)
    4148              :             {
    4149              :               /* Create: or_tmp = or_tmp | addr_tmp.  */
    4150            1 :               sprintf (tmp_name, "orptrs%d", i);
    4151            1 :               new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL,
    4152              :                                                     tmp_name);
    4153            1 :               or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
    4154              :                                              or_tmp_name, addr_tmp_name);
    4155            1 :               gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
    4156            1 :               or_tmp_name = new_or_tmp_name;
    4157              :             }
    4158              :           else
    4159              :             or_tmp_name = addr_tmp_name;
    4160              :         }
    4161              : 
    4162              :     } /* end for i */
    4163              : 
    4164           45 :   mask_cst = build_int_cst (int_ptrsize_type, mask);
    4165              : 
    4166              :   /* create: and_tmp = or_tmp & mask  */
    4167           45 :   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
    4168              : 
    4169           45 :   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
    4170              :                                   or_tmp_name, mask_cst);
    4171           45 :   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
    4172              : 
    4173              :   /* Make and_tmp the left operand of the conditional test against zero.
    4174              :      if and_tmp has a nonzero bit then some address is unaligned.  */
    4175           45 :   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
    4176           45 :   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4177              :                                 and_tmp_name, ptrsize_zero);
    4178           45 :   chain_cond_expr (cond_expr, part_cond_expr);
    4179           45 : }
    4180              : 
    4181              : /* Function vect_create_cond_for_vla_spec_read.
    4182              : 
    4183              :    Create a conditional expression that represents the run-time checks with
    4184              :    max speculative read amount in VLA modes.  We check two things:
    4185              :      1) if the max speculative read amount exceeds the min page size
    4186              :      2) if the VF is power-of-2 - done by checking the max read amount instead
    4187              : 
    4188              :    Input:
    4189              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4190              :                 with logical AND operation.
    4191              :    LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max
    4192              :                 possible speculative read amount in VLA modes.
    4193              : 
    4194              :    Output:
    4195              :    COND_EXPR - conditional expression.
    4196              : 
    4197              :    The returned COND_EXPR is the conditional expression to be used in the
    4198              :    if statement that controls which version of the loop gets executed at
    4199              :    runtime.  */
    4200              : 
    4201              : static void
    4202            0 : vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr)
    4203              : {
    4204            0 :   poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo);
    4205            0 :   tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly);
    4206              : 
    4207              :   /* Both the read amount and the VF must be variants, and the read amount must
    4208              :      be a constant power-of-2 multiple of the VF.  */
    4209            0 :   unsigned HOST_WIDE_INT multiple;
    4210            0 :   gcc_assert (!read_amount_poly.is_constant ()
    4211              :               && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
    4212              :               && constant_multiple_p (read_amount_poly,
    4213              :                                       LOOP_VINFO_VECT_FACTOR (loop_vinfo),
    4214              :                                       &multiple)
    4215              :               && pow2p_hwi (multiple));
    4216              : 
    4217              :   tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, 0U);
    4218              :   tree cst_ul_one = build_int_cstu (long_unsigned_type_node, 1U);
    4219              :   tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node,
    4220              :                                          (unsigned long) param_min_pagesize);
    4221              : 
    4222              :   /* Create an expression of "amount & (amount - 1) == 0".  */
    4223              :   tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node,
    4224              :                                 amount, cst_ul_one);
    4225              :   tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node,
    4226              :                                       amount, amount_m1);
    4227              :   tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
    4228              :                                        amount_and_expr, cst_ul_zero);
    4229              :   chain_cond_expr (cond_expr, powof2_cond_expr);
    4230              : 
    4231              :   /* Create an expression of "amount <= cst_ul_pagesize".  */
    4232              :   tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node,
    4233              :                                          amount, cst_ul_pagesize);
    4234              :   chain_cond_expr (cond_expr, pagesize_cond_expr);
    4235              : }
    4236              : 
    4237              : /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
    4238              :    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
    4239              :    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
    4240              :    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
    4241              : 
    4242              : static void
    4243         3299 : vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
    4244              : {
    4245         3299 :   const vec<vec_object_pair> &pairs
    4246              :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    4247         3299 :   unsigned int i;
    4248         3299 :   vec_object_pair *pair;
    4249         3311 :   FOR_EACH_VEC_ELT (pairs, i, pair)
    4250              :     {
    4251           12 :       tree addr1 = build_fold_addr_expr (pair->first);
    4252           12 :       tree addr2 = build_fold_addr_expr (pair->second);
    4253           12 :       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
    4254              :                                          addr1, addr2);
    4255           12 :       chain_cond_expr (cond_expr, part_cond_expr);
    4256              :     }
    4257         3299 : }
    4258              : 
    4259              : /* Create an expression that is true when all lower-bound conditions for
    4260              :    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
    4261              : 
    4262              : static void
    4263         3299 : vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
    4264              : {
    4265         3299 :   const vec<vec_lower_bound> &lower_bounds
    4266              :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    4267         3549 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    4268              :     {
    4269          250 :       tree expr = lower_bounds[i].expr;
    4270          250 :       tree type = unsigned_type_for (TREE_TYPE (expr));
    4271          250 :       expr = fold_convert (type, expr);
    4272          250 :       poly_uint64 bound = lower_bounds[i].min_value;
    4273          250 :       if (!lower_bounds[i].unsigned_p)
    4274              :         {
    4275           91 :           expr = fold_build2 (PLUS_EXPR, type, expr,
    4276              :                               build_int_cstu (type, bound - 1));
    4277           91 :           bound += bound - 1;
    4278              :         }
    4279          250 :       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
    4280              :                                          build_int_cstu (type, bound));
    4281          250 :       chain_cond_expr (cond_expr, part_cond_expr);
    4282              :     }
    4283         3299 : }
    4284              : 
    4285              : /* Function vect_create_cond_for_alias_checks.
    4286              : 
    4287              :    Create a conditional expression that represents the run-time checks for
    4288              :    overlapping of address ranges represented by a list of data references
    4289              :    relations passed as input.
    4290              : 
    4291              :    Input:
    4292              :    COND_EXPR  - input conditional expression.  New conditions will be chained
    4293              :                 with logical AND operation.  If it is NULL, then the function
    4294              :                 is used to return the number of alias checks.
    4295              :    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
    4296              :                 to be checked.
    4297              : 
    4298              :    Output:
    4299              :    COND_EXPR - conditional expression.
    4300              : 
    4301              :    The returned COND_EXPR is the conditional expression to be used in the if
    4302              :    statement that controls which version of the loop gets executed at runtime.
    4303              : */
    4304              : 
    4305              : void
    4306         3299 : vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
    4307              : {
    4308         3299 :   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
    4309              :     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    4310              : 
    4311         3299 :   if (comp_alias_ddrs.is_empty ())
    4312              :     return;
    4313              : 
    4314         3170 :   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
    4315              :                                &comp_alias_ddrs, cond_expr);
    4316         3170 :   if (dump_enabled_p ())
    4317         1284 :     dump_printf_loc (MSG_NOTE, vect_location,
    4318              :                      "created %u versioning for alias checks.\n",
    4319              :                      comp_alias_ddrs.length ());
    4320              : }
    4321              : 
    4322              : 
    4323              : /* Function vect_loop_versioning.
    4324              : 
    4325              :    If the loop has data references that may or may not be aligned or/and
    4326              :    has data reference relations whose independence was not proven then
    4327              :    two versions of the loop need to be generated, one which is vectorized
    4328              :    and one which isn't.  A test is then generated to control which of the
    4329              :    loops is executed.  The test checks for the alignment of all of the
    4330              :    data references that may or may not be aligned.  An additional
    4331              :    sequence of runtime tests is generated for each pairs of DDRs whose
    4332              :    independence was not proven.  The vectorized version of loop is
    4333              :    executed only if both alias and alignment tests are passed.
    4334              : 
    4335              :    The test generated to check which version of loop is executed
    4336              :    is modified to also check for profitability as indicated by the
    4337              :    cost model threshold TH.
    4338              : 
    4339              :    The versioning precondition(s) are placed in *COND_EXPR and
    4340              :    *COND_EXPR_STMT_LIST.  */
    4341              : 
    4342              : class loop *
    4343         3757 : vect_loop_versioning (loop_vec_info loop_vinfo,
    4344              :                       gimple *loop_vectorized_call)
    4345              : {
    4346         3757 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
    4347         3757 :   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
    4348         3757 :   basic_block condition_bb;
    4349         3757 :   gphi_iterator gsi;
    4350         3757 :   gimple_stmt_iterator cond_exp_gsi;
    4351         3757 :   basic_block merge_bb;
    4352         3757 :   basic_block new_exit_bb;
    4353         3757 :   edge new_exit_e, e;
    4354         3757 :   gphi *orig_phi, *new_phi;
    4355         3757 :   tree cond_expr = NULL_TREE;
    4356         3757 :   gimple_seq cond_expr_stmt_list = NULL;
    4357         3757 :   tree arg;
    4358         3757 :   profile_probability prob = profile_probability::likely ();
    4359         3757 :   gimple_seq gimplify_stmt_list = NULL;
    4360         3757 :   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
    4361         3757 :   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
    4362         3757 :   bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo);
    4363         3757 :   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
    4364         3757 :   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
    4365         3757 :   poly_uint64 versioning_threshold
    4366              :     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
    4367         3757 :   tree version_simd_if_cond
    4368              :     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
    4369         3757 :   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
    4370         3757 :   bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
    4371              : 
    4372         3757 :   if (!uncounted_p && vect_apply_runtime_profitability_check_p (loop_vinfo)
    4373              :       && !ordered_p (th, versioning_threshold))
    4374              :     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4375              :                              build_int_cst (TREE_TYPE (scalar_loop_iters),
    4376              :                                             th - 1));
    4377         3757 :   if (!uncounted_p && maybe_ne (versioning_threshold, 0U))
    4378              :     {
    4379         3744 :       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
    4380              :                                build_int_cst (TREE_TYPE (scalar_loop_iters),
    4381              :                                               versioning_threshold - 1));
    4382         3744 :       if (cond_expr)
    4383            0 :         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
    4384              :                                  expr, cond_expr);
    4385              :       else
    4386         3744 :         cond_expr = expr;
    4387              :     }
    4388              : 
    4389         3757 :   tree cost_name = NULL_TREE;
    4390         3757 :   profile_probability prob2 = profile_probability::always ();
    4391         3757 :   if (cond_expr
    4392         3744 :       && EXPR_P (cond_expr)
    4393         2586 :       && (version_niter
    4394         2586 :           || version_align
    4395              :           || version_alias
    4396         2250 :           || version_simd_if_cond))
    4397              :     {
    4398         2586 :       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4399              :                                                       &cond_expr_stmt_list,
    4400              :                                                       is_gimple_val, NULL_TREE);
    4401              :       /* Split prob () into two so that the overall probability of passing
    4402              :          both the cost-model and versioning checks is the orig prob.  */
    4403         2586 :       prob2 = prob = prob.sqrt ();
    4404              :     }
    4405              : 
    4406         3757 :   if (version_niter)
    4407          376 :     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
    4408              : 
    4409         3757 :   if (cond_expr)
    4410              :     {
    4411         3744 :       gimple_seq tem = NULL;
    4412         3744 :       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4413              :                                           &tem, is_gimple_condexpr_for_cond,
    4414              :                                           NULL_TREE);
    4415         3744 :       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
    4416              :     }
    4417              : 
    4418         3757 :   if (version_align)
    4419           45 :     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
    4420              :                                        &cond_expr_stmt_list);
    4421              : 
    4422         3757 :   if (version_spec_read)
    4423            0 :     vect_create_cond_for_vla_spec_read (loop_vinfo, &cond_expr);
    4424              : 
    4425         3757 :   if (version_alias)
    4426              :     {
    4427         3299 :       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
    4428         3299 :       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
    4429         3299 :       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
    4430              :     }
    4431              : 
    4432         3757 :   if (version_simd_if_cond)
    4433              :     {
    4434           58 :       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
    4435           58 :       if (flag_checking)
    4436           58 :         if (basic_block bb
    4437           58 :             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
    4438           58 :           gcc_assert (bb != loop->header
    4439              :                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
    4440              :                       && (scalar_loop == NULL
    4441              :                           || (bb != scalar_loop->header
    4442              :                               && dominated_by_p (CDI_DOMINATORS,
    4443              :                                                  scalar_loop->header, bb))));
    4444           58 :       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
    4445           58 :       tree c = fold_build2 (NE_EXPR, boolean_type_node,
    4446              :                             version_simd_if_cond, zero);
    4447           58 :       if (cond_expr)
    4448           58 :         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
    4449              :                                  c, cond_expr);
    4450              :       else
    4451            0 :         cond_expr = c;
    4452           58 :       if (dump_enabled_p ())
    4453            5 :         dump_printf_loc (MSG_NOTE, vect_location,
    4454              :                          "created versioning for simd if condition check.\n");
    4455              :     }
    4456              : 
    4457         3757 :   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
    4458              :                                       &gimplify_stmt_list,
    4459              :                                       is_gimple_condexpr_for_cond, NULL_TREE);
    4460         3757 :   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
    4461              : 
    4462              :   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
    4463              :      invariant in.  */
    4464         3757 :   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
    4465         3757 :   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
    4466        67985 :        !gsi_end_p (gsi); gsi_next (&gsi))
    4467              :     {
    4468        64228 :       gimple *stmt = gsi_stmt (gsi);
    4469        64228 :       update_stmt (stmt);
    4470        64228 :       ssa_op_iter iter;
    4471        64228 :       use_operand_p use_p;
    4472        64228 :       basic_block def_bb;
    4473       147679 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    4474        83451 :         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
    4475        83451 :             && flow_bb_inside_loop_p (outermost, def_bb))
    4476         2515 :           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
    4477              :     }
    4478              : 
    4479              :   /* Search for the outermost loop we can version.  Avoid versioning of
    4480              :      non-perfect nests but allow if-conversion versioned loops inside.  */
    4481         3757 :   class loop *loop_to_version = loop;
    4482         3757 :   if (flow_loop_nested_p (outermost, loop))
    4483              :     {
    4484         1454 :       if (dump_enabled_p ())
    4485          676 :         dump_printf_loc (MSG_NOTE, vect_location,
    4486              :                          "trying to apply versioning to outer loop %d\n",
    4487              :                          outermost->num);
    4488         1454 :       if (outermost->num == 0)
    4489         1362 :         outermost = superloop_at_depth (loop, 1);
    4490              :       /* And avoid applying versioning on non-perfect nests.  */
    4491              :       while (loop_to_version != outermost
    4492          118 :              && (e = single_exit (loop_outer (loop_to_version)))
    4493           99 :              && !(e->flags & EDGE_COMPLEX)
    4494           98 :              && (!loop_outer (loop_to_version)->inner->next
    4495           53 :                  || vect_loop_vectorized_call (loop_to_version))
    4496         1554 :              && (!loop_outer (loop_to_version)->inner->next
    4497            5 :                  || !loop_outer (loop_to_version)->inner->next->next))
    4498              :         loop_to_version = loop_outer (loop_to_version);
    4499              :     }
    4500              : 
    4501              :   /* Apply versioning.  If there is already a scalar version created by
    4502              :      if-conversion re-use that.  Note we cannot re-use the copy of
    4503              :      an if-converted outer-loop when vectorizing the inner loop only.  */
    4504         3757 :   gcond *cond;
    4505         3757 :   if ((!loop_to_version->inner || loop == loop_to_version)
    4506         3719 :       && loop_vectorized_call)
    4507              :     {
    4508           88 :       gcc_assert (scalar_loop);
    4509           88 :       condition_bb = gimple_bb (loop_vectorized_call);
    4510          176 :       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
    4511           88 :       gimple_cond_set_condition_from_tree (cond, cond_expr);
    4512           88 :       update_stmt (cond);
    4513              : 
    4514           88 :       if (cond_expr_stmt_list)
    4515              :         {
    4516           88 :           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
    4517           88 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4518              :                                  GSI_SAME_STMT);
    4519              :         }
    4520              : 
    4521              :       /* if-conversion uses profile_probability::always () for both paths,
    4522              :          reset the paths probabilities appropriately.  */
    4523           88 :       edge te, fe;
    4524           88 :       extract_true_false_edges_from_block (condition_bb, &te, &fe);
    4525           88 :       te->probability = prob;
    4526           88 :       fe->probability = prob.invert ();
    4527              :       /* We can scale loops counts immediately but have to postpone
    4528              :          scaling the scalar loop because we re-use it during peeling.
    4529              : 
    4530              :          Ifcvt duplicates loop preheader, loop body and produces an basic
    4531              :          block after loop exit.  We need to scale all that.  */
    4532           88 :       basic_block preheader = loop_preheader_edge (loop_to_version)->src;
    4533           88 :       preheader->count = preheader->count.apply_probability (prob * prob2);
    4534           88 :       scale_loop_frequencies (loop_to_version, prob * prob2);
    4535              :       /* When the loop has multiple exits then we can only version itself.
    4536              :         This is denoted by loop_to_version == loop.  In this case we can
    4537              :         do the versioning by selecting the exit edge the vectorizer is
    4538              :         currently using.  */
    4539           88 :       edge exit_edge;
    4540           88 :       if (loop_to_version == loop)
    4541           88 :        exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4542              :       else
    4543            0 :        exit_edge = single_exit (loop_to_version);
    4544           88 :       exit_edge->dest->count = preheader->count;
    4545           88 :       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
    4546              : 
    4547           88 :       nloop = scalar_loop;
    4548           88 :       if (dump_enabled_p ())
    4549           90 :         dump_printf_loc (MSG_NOTE, vect_location,
    4550              :                          "reusing %sloop version created by if conversion\n",
    4551              :                          loop_to_version != loop ? "outer " : "");
    4552           88 :     }
    4553              :   else
    4554              :     {
    4555         3669 :       if (loop_to_version != loop
    4556         3669 :           && dump_enabled_p ())
    4557           14 :         dump_printf_loc (MSG_NOTE, vect_location,
    4558              :                          "applying loop versioning to outer loop %d\n",
    4559              :                          loop_to_version->num);
    4560              : 
    4561         3669 :       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
    4562              : 
    4563         3669 :       initialize_original_copy_tables ();
    4564         7338 :       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
    4565         3669 :                             prob * prob2, (prob * prob2).invert (),
    4566         3669 :                             prob * prob2, (prob * prob2).invert (),
    4567              :                             true);
    4568              : 
    4569              :       /* If the PHI nodes in the loop header were reallocated, we need to fix up
    4570              :          our internally stashed copies of those.  */
    4571         3669 :       if (loop_to_version == loop)
    4572         3631 :         for (auto gsi = gsi_start_phis (loop->header);
    4573        14486 :              !gsi_end_p (gsi); gsi_next (&gsi))
    4574        10855 :           loop_vinfo->resync_stmt_addr (gsi.phi ());
    4575              : 
    4576              :       /* We will later insert second conditional so overall outcome of
    4577              :          both is prob * prob2.  */
    4578         3669 :       edge true_e, false_e;
    4579         3669 :       extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
    4580         3669 :       true_e->probability = prob;
    4581         3669 :       false_e->probability = prob.invert ();
    4582         3669 :       gcc_assert (nloop);
    4583         3669 :       nloop = get_loop_copy (loop);
    4584              : 
    4585              :       /* Assign hierarchical discriminators to distinguish loop versions.
    4586              :          Only assign to the scalar version here; the vectorized version will
    4587              :          get discriminators later during transformation/peeling.
    4588              :          Use dynamic copy_id allocation instead of hardcoded constants.  */
    4589         3669 :       gimple *nloop_last = last_nondebug_stmt (nloop->header);
    4590         3669 :       location_t nloop_loc
    4591         3669 :         = nloop_last ? gimple_location (nloop_last) : UNKNOWN_LOCATION;
    4592         3669 :       if (nloop_loc != UNKNOWN_LOCATION)
    4593              :         {
    4594         3208 :           unsigned int nloop_copyid = allocate_copyid_base (nloop_loc, 1);
    4595         3208 :           assign_discriminators_to_loop (nloop, 0, nloop_copyid);
    4596              :         }
    4597              :       /* For cycle vectorization with SLP we rely on the PHI arguments
    4598              :          appearing in the same order as the SLP node operands which for the
    4599              :          loop PHI nodes means the preheader edge dest index needs to remain
    4600              :          the same for the analyzed loop which also becomes the vectorized one.
    4601              :          Make it so in case the state after versioning differs by redirecting
    4602              :          the first edge into the header to the same destination which moves
    4603              :          it last.  */
    4604         3669 :       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
    4605              :         {
    4606          292 :           edge e = EDGE_PRED (loop->header, 0);
    4607          292 :           ssa_redirect_edge (e, e->dest);
    4608          292 :           flush_pending_stmts (e);
    4609              :         }
    4610         3669 :       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
    4611              : 
    4612              :       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
    4613              :          reap those otherwise;  they also refer to the original
    4614              :          loops.  */
    4615              :       class loop *l = loop;
    4616         3674 :       while (gimple *call = vect_loop_vectorized_call (l))
    4617              :         {
    4618            5 :           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
    4619            5 :           fold_loop_internal_call (call, boolean_false_node);
    4620            5 :           l = loop_outer (l);
    4621            5 :         }
    4622         3669 :       free_original_copy_tables ();
    4623              : 
    4624         3669 :       if (cond_expr_stmt_list)
    4625              :         {
    4626         3588 :           cond_exp_gsi = gsi_last_bb (condition_bb);
    4627         3588 :           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
    4628              :                                  GSI_SAME_STMT);
    4629              :         }
    4630              : 
    4631              :       /* Loop versioning violates an assumption we try to maintain during
    4632              :          vectorization - that the loop exit block has a single predecessor.
    4633              :          After versioning, the exit block of both loop versions is the same
    4634              :          basic block (i.e. it has two predecessors). Just in order to simplify
    4635              :          following transformations in the vectorizer, we fix this situation
    4636              :          here by adding a new (empty) block on the exit-edge of the loop,
    4637              :          with the proper loop-exit phis to maintain loop-closed-form.
    4638              :          If loop versioning wasn't done from loop, but scalar_loop instead,
    4639              :          merge_bb will have already just a single successor.  */
    4640              : 
    4641              :       /* When the loop has multiple exits then we can only version itself.
    4642              :          This is denoted by loop_to_version == loop.  In this case we can
    4643              :          do the versioning by selecting the exit edge the vectorizer is
    4644              :          currently using.  */
    4645         3669 :       edge exit_edge;
    4646         3669 :       if (loop_to_version == loop)
    4647         3631 :         exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
    4648              :       else
    4649           38 :         exit_edge = single_exit (loop_to_version);
    4650              : 
    4651         3669 :       gcc_assert (exit_edge);
    4652         3669 :       merge_bb = exit_edge->dest;
    4653         3669 :       if (EDGE_COUNT (merge_bb->preds) >= 2)
    4654              :         {
    4655         3669 :           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
    4656         3669 :           new_exit_bb = split_edge (exit_edge);
    4657         3669 :           new_exit_e = exit_edge;
    4658         3669 :           e = EDGE_SUCC (new_exit_bb, 0);
    4659              : 
    4660         7585 :           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
    4661         3916 :                gsi_next (&gsi))
    4662              :             {
    4663         3916 :               tree new_res;
    4664         3916 :               orig_phi = gsi.phi ();
    4665         3916 :               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
    4666         3916 :               new_phi = create_phi_node (new_res, new_exit_bb);
    4667         3916 :               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
    4668         3916 :               add_phi_arg (new_phi, arg, new_exit_e,
    4669              :                            gimple_phi_arg_location_from_edge (orig_phi, e));
    4670         3916 :               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
    4671              :             }
    4672              :         }
    4673              : 
    4674         3669 :       update_ssa (TODO_update_ssa_no_phi);
    4675              :     }
    4676              : 
    4677              :   /* Split the cost model check off to a separate BB.  Costing assumes
    4678              :      this is the only thing we perform when we enter the scalar loop
    4679              :      from a failed cost decision.  */
    4680         3757 :   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
    4681              :     {
    4682         2586 :       gimple *def = SSA_NAME_DEF_STMT (cost_name);
    4683         2586 :       gcc_assert (gimple_bb (def) == condition_bb);
    4684              :       /* All uses of the cost check are 'true' after the check we
    4685              :          are going to insert.  */
    4686         2586 :       replace_uses_by (cost_name, boolean_true_node);
    4687              :       /* And we're going to build the new single use of it.  */
    4688         2586 :       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
    4689              :                                        NULL_TREE, NULL_TREE);
    4690         2586 :       edge e = split_block (gimple_bb (def), def);
    4691         2586 :       gimple_stmt_iterator gsi = gsi_for_stmt (def);
    4692         2586 :       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
    4693         2586 :       edge true_e, false_e;
    4694         2586 :       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
    4695         2586 :       e->flags &= ~EDGE_FALLTHRU;
    4696         2586 :       e->flags |= EDGE_TRUE_VALUE;
    4697         2586 :       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
    4698         2586 :       e->probability = prob2;
    4699         2586 :       e2->probability = prob2.invert ();
    4700         2586 :       e->dest->count = e->count ();
    4701         2586 :       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
    4702         2586 :       auto_vec<basic_block, 3> adj;
    4703         2586 :       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
    4704         7662 :            son;
    4705         5076 :            son = next_dom_son (CDI_DOMINATORS, son))
    4706         7566 :         if (EDGE_COUNT (son->preds) > 1)
    4707         2490 :           adj.safe_push (son);
    4708        10248 :       for (auto son : adj)
    4709         2490 :         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
    4710              :       //debug_bb (condition_bb);
    4711              :       //debug_bb (e->src);
    4712         2586 :     }
    4713              : 
    4714         3757 :   if (version_niter)
    4715              :     {
    4716              :       /* The versioned loop could be infinite, we need to clear existing
    4717              :          niter information which is copied from the original loop.  */
    4718          376 :       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
    4719          376 :       vect_free_loop_info_assumptions (nloop);
    4720              :     }
    4721              : 
    4722         3757 :   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
    4723         3757 :       && dump_enabled_p ())
    4724              :     {
    4725          818 :       if (version_alias)
    4726          744 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4727              :                          vect_location,
    4728              :                          "loop versioned for vectorization because of "
    4729              :                          "possible aliasing\n");
    4730          818 :       if (version_align)
    4731           30 :         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
    4732              :                          vect_location,
    4733              :                          "loop versioned for vectorization to enhance "
    4734              :                          "alignment\n");
    4735              : 
    4736              :     }
    4737              : 
    4738         3757 :   return nloop;
    4739              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.